aboutsummaryrefslogtreecommitdiff
path: root/include/net/bluetooth/bluetooth.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/bluetooth/bluetooth.h')
-rw-r--r--include/net/bluetooth/bluetooth.h259
1 files changed, 220 insertions, 39 deletions
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 718394e2c01..904777c1cd2 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -1,4 +1,4 @@
-/*
+/*
BlueZ - Bluetooth protocol stack for Linux
Copyright (C) 2000-2001 Qualcomm Incorporated
@@ -12,30 +12,33 @@
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
- CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
- COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+ ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+ COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
SOFTWARE IS DISCLAIMED.
*/
#ifndef __BLUETOOTH_H
#define __BLUETOOTH_H
-#include <asm/types.h>
-#include <asm/byteorder.h>
-#include <linux/list.h>
#include <linux/poll.h>
#include <net/sock.h>
+#include <linux/seq_file.h>
#ifndef AF_BLUETOOTH
#define AF_BLUETOOTH 31
#define PF_BLUETOOTH AF_BLUETOOTH
#endif
+/* Bluetooth versions */
+#define BLUETOOTH_VER_1_1 1
+#define BLUETOOTH_VER_1_2 2
+#define BLUETOOTH_VER_2_0 3
+
/* Reserv for core and drivers use */
#define BT_SKB_RESERVE 8
@@ -56,17 +59,74 @@
#define BT_SECURITY 4
struct bt_security {
__u8 level;
+ __u8 key_size;
};
#define BT_SECURITY_SDP 0
#define BT_SECURITY_LOW 1
#define BT_SECURITY_MEDIUM 2
#define BT_SECURITY_HIGH 3
+#define BT_SECURITY_FIPS 4
#define BT_DEFER_SETUP 7
-#define BT_INFO(fmt, arg...) printk(KERN_INFO "Bluetooth: " fmt "\n" , ## arg)
-#define BT_ERR(fmt, arg...) printk(KERN_ERR "%s: " fmt "\n" , __func__ , ## arg)
-#define BT_DBG(fmt, arg...) pr_debug("%s: " fmt "\n" , __func__ , ## arg)
+#define BT_FLUSHABLE 8
+
+#define BT_FLUSHABLE_OFF 0
+#define BT_FLUSHABLE_ON 1
+
+#define BT_POWER 9
+struct bt_power {
+ __u8 force_active;
+};
+#define BT_POWER_FORCE_ACTIVE_OFF 0
+#define BT_POWER_FORCE_ACTIVE_ON 1
+
+#define BT_CHANNEL_POLICY 10
+
+/* BR/EDR only (default policy)
+ * AMP controllers cannot be used.
+ * Channel move requests from the remote device are denied.
+ * If the L2CAP channel is currently using AMP, move the channel to BR/EDR.
+ */
+#define BT_CHANNEL_POLICY_BREDR_ONLY 0
+
+/* BR/EDR Preferred
+ * Allow use of AMP controllers.
+ * If the L2CAP channel is currently on AMP, move it to BR/EDR.
+ * Channel move requests from the remote device are allowed.
+ */
+#define BT_CHANNEL_POLICY_BREDR_PREFERRED 1
+
+/* AMP Preferred
+ * Allow use of AMP controllers
+ * If the L2CAP channel is currently on BR/EDR and AMP controller
+ * resources are available, initiate a channel move to AMP.
+ * Channel move requests from the remote device are allowed.
+ * If the L2CAP socket has not been connected yet, try to create
+ * and configure the channel directly on an AMP controller rather
+ * than BR/EDR.
+ */
+#define BT_CHANNEL_POLICY_AMP_PREFERRED 2
+
+#define BT_VOICE 11
+struct bt_voice {
+ __u16 setting;
+};
+
+#define BT_VOICE_TRANSPARENT 0x0003
+#define BT_VOICE_CVSD_16BIT 0x0060
+
+#define BT_SNDMTU 12
+#define BT_RCVMTU 13
+
+__printf(1, 2)
+int bt_info(const char *fmt, ...);
+__printf(1, 2)
+int bt_err(const char *fmt, ...);
+
+#define BT_INFO(fmt, ...) bt_info(fmt "\n", ##__VA_ARGS__)
+#define BT_ERR(fmt, ...) bt_err(fmt "\n", ##__VA_ARGS__)
+#define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__)
/* Connection and socket states */
enum {
@@ -81,27 +141,80 @@ enum {
BT_CLOSED
};
+/* If unused will be removed by compiler */
+static inline const char *state_to_string(int state)
+{
+ switch (state) {
+ case BT_CONNECTED:
+ return "BT_CONNECTED";
+ case BT_OPEN:
+ return "BT_OPEN";
+ case BT_BOUND:
+ return "BT_BOUND";
+ case BT_LISTEN:
+ return "BT_LISTEN";
+ case BT_CONNECT:
+ return "BT_CONNECT";
+ case BT_CONNECT2:
+ return "BT_CONNECT2";
+ case BT_CONFIG:
+ return "BT_CONFIG";
+ case BT_DISCONN:
+ return "BT_DISCONN";
+ case BT_CLOSED:
+ return "BT_CLOSED";
+ }
+
+ return "invalid state";
+}
+
/* BD Address */
typedef struct {
__u8 b[6];
-} __attribute__((packed)) bdaddr_t;
+} __packed bdaddr_t;
+
+/* BD Address type */
+#define BDADDR_BREDR 0x00
+#define BDADDR_LE_PUBLIC 0x01
+#define BDADDR_LE_RANDOM 0x02
+
+static inline bool bdaddr_type_is_valid(__u8 type)
+{
+ switch (type) {
+ case BDADDR_BREDR:
+ case BDADDR_LE_PUBLIC:
+ case BDADDR_LE_RANDOM:
+ return true;
+ }
+
+ return false;
+}
+
+static inline bool bdaddr_type_is_le(__u8 type)
+{
+ switch (type) {
+ case BDADDR_LE_PUBLIC:
+ case BDADDR_LE_RANDOM:
+ return true;
+ }
+
+ return false;
+}
-#define BDADDR_ANY (&(bdaddr_t) {{0, 0, 0, 0, 0, 0}})
-#define BDADDR_LOCAL (&(bdaddr_t) {{0, 0, 0, 0xff, 0xff, 0xff}})
+#define BDADDR_ANY (&(bdaddr_t) {{0, 0, 0, 0, 0, 0}})
+#define BDADDR_NONE (&(bdaddr_t) {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}})
/* Copy, swap, convert BD Address */
-static inline int bacmp(bdaddr_t *ba1, bdaddr_t *ba2)
+static inline int bacmp(const bdaddr_t *ba1, const bdaddr_t *ba2)
{
return memcmp(ba1, ba2, sizeof(bdaddr_t));
}
-static inline void bacpy(bdaddr_t *dst, bdaddr_t *src)
+static inline void bacpy(bdaddr_t *dst, const bdaddr_t *src)
{
memcpy(dst, src, sizeof(bdaddr_t));
}
void baswap(bdaddr_t *dst, bdaddr_t *src);
-char *batostr(bdaddr_t *ba);
-bdaddr_t *strtoba(char *str);
/* Common socket structures and functions */
@@ -109,38 +222,75 @@ bdaddr_t *strtoba(char *str);
struct bt_sock {
struct sock sk;
- bdaddr_t src;
- bdaddr_t dst;
struct list_head accept_q;
struct sock *parent;
- u32 defer_setup;
+ unsigned long flags;
+ void (*skb_msg_name)(struct sk_buff *, void *, int *);
+};
+
+enum {
+ BT_SK_DEFER_SETUP,
+ BT_SK_SUSPEND,
};
struct bt_sock_list {
struct hlist_head head;
rwlock_t lock;
+#ifdef CONFIG_PROC_FS
+ int (* custom_seq_show)(struct seq_file *, void *);
+#endif
};
-int bt_sock_register(int proto, struct net_proto_family *ops);
-int bt_sock_unregister(int proto);
+int bt_sock_register(int proto, const struct net_proto_family *ops);
+void bt_sock_unregister(int proto);
void bt_sock_link(struct bt_sock_list *l, struct sock *s);
void bt_sock_unlink(struct bt_sock_list *l, struct sock *s);
-int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags);
-uint bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait);
+int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *msg, size_t len, int flags);
+int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *msg, size_t len, int flags);
+uint bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait);
int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
+int bt_sock_wait_ready(struct sock *sk, unsigned long flags);
void bt_accept_enqueue(struct sock *parent, struct sock *sk);
void bt_accept_unlink(struct sock *sk);
struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock);
/* Skb helpers */
+struct l2cap_ctrl {
+ unsigned int sframe:1,
+ poll:1,
+ final:1,
+ fcs:1,
+ sar:2,
+ super:2;
+ __u16 reqseq;
+ __u16 txseq;
+ __u8 retries;
+};
+
+struct hci_dev;
+
+typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status);
+
+struct hci_req_ctrl {
+ bool start;
+ u8 event;
+ hci_req_complete_t complete;
+};
+
struct bt_skb_cb {
__u8 pkt_type;
__u8 incoming;
- __u8 tx_seq;
- __u8 retries;
- __u8 sar;
+ __u16 expect;
+ __u8 force_active;
+ struct l2cap_chan *chan;
+ struct l2cap_ctrl control;
+ struct hci_req_ctrl req;
+ bdaddr_t bdaddr;
+ __le16 psm;
};
#define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb))
@@ -148,34 +298,65 @@ static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how)
{
struct sk_buff *skb;
- if ((skb = alloc_skb(len + BT_SKB_RESERVE, how))) {
+ skb = alloc_skb(len + BT_SKB_RESERVE, how);
+ if (skb) {
skb_reserve(skb, BT_SKB_RESERVE);
bt_cb(skb)->incoming = 0;
}
return skb;
}
-static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, unsigned long len,
- int nb, int *err)
+static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk,
+ unsigned long len, int nb, int *err)
{
struct sk_buff *skb;
- if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) {
+ skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err);
+ if (skb) {
skb_reserve(skb, BT_SKB_RESERVE);
bt_cb(skb)->incoming = 0;
}
+ if (!skb && *err)
+ return NULL;
+
+ *err = sock_error(sk);
+ if (*err)
+ goto out;
+
+ if (sk->sk_shutdown) {
+ *err = -ECONNRESET;
+ goto out;
+ }
+
return skb;
+
+out:
+ kfree_skb(skb);
+ return NULL;
}
-int bt_err(__u16 code);
+int bt_to_errno(__u16 code);
+
+int hci_sock_init(void);
+void hci_sock_cleanup(void);
+
+int bt_sysfs_init(void);
+void bt_sysfs_cleanup(void);
+
+int bt_procfs_init(struct net *net, const char *name,
+ struct bt_sock_list *sk_list,
+ int (*seq_show)(struct seq_file *, void *));
+void bt_procfs_cleanup(struct net *net, const char *name);
+
+extern struct dentry *bt_debugfs;
-extern int hci_sock_init(void);
-extern void hci_sock_cleanup(void);
+int l2cap_init(void);
+void l2cap_exit(void);
-extern int bt_sysfs_init(void);
-extern void bt_sysfs_cleanup(void);
+int sco_init(void);
+void sco_exit(void);
-extern struct class *bt_class;
+void bt_sock_reclassify_lock(struct sock *sk, int proto);
#endif /* __BLUETOOTH_H */
ov'>fs/xfs/time.h (renamed from fs/xfs/linux-2.6/time.h)0
-rw-r--r--fs/xfs/uuid.c (renamed from fs/xfs/support/uuid.c)80
-rw-r--r--fs/xfs/uuid.h (renamed from fs/xfs/support/uuid.h)11
-rw-r--r--fs/xfs/xfs.h15
-rw-r--r--fs/xfs/xfs_acl.c995
-rw-r--r--fs/xfs/xfs_acl.h113
-rw-r--r--fs/xfs/xfs_ag.h174
-rw-r--r--fs/xfs/xfs_alloc.c1887
-rw-r--r--fs/xfs/xfs_alloc.h131
-rw-r--r--fs/xfs/xfs_alloc_btree.c2473
-rw-r--r--fs/xfs/xfs_alloc_btree.h154
-rw-r--r--fs/xfs/xfs_aops.c1770
-rw-r--r--fs/xfs/xfs_aops.h (renamed from fs/xfs/linux-2.6/xfs_aops.h)33
-rw-r--r--fs/xfs/xfs_arch.h236
-rw-r--r--fs/xfs/xfs_attr.c1964
-rw-r--r--fs/xfs/xfs_attr.h112
-rw-r--r--fs/xfs/xfs_attr_inactive.c452
-rw-r--r--fs/xfs/xfs_attr_leaf.c3127
-rw-r--r--fs/xfs/xfs_attr_leaf.h232
-rw-r--r--fs/xfs/xfs_attr_list.c653
-rw-r--r--fs/xfs/xfs_attr_remote.c628
-rw-r--r--fs/xfs/xfs_attr_remote.h (renamed from fs/xfs/linux-2.6/mutex.h)14
-rw-r--r--fs/xfs/xfs_attr_sf.h54
-rw-r--r--fs/xfs/xfs_behavior.c203
-rw-r--r--fs/xfs/xfs_behavior.h190
-rw-r--r--fs/xfs/xfs_bit.c198
-rw-r--r--fs/xfs/xfs_bit.h50
-rw-r--r--fs/xfs/xfs_bmap.c8321
-rw-r--r--fs/xfs/xfs_bmap.h381
-rw-r--r--fs/xfs/xfs_bmap_btree.c3134
-rw-r--r--fs/xfs/xfs_bmap_btree.h382
-rw-r--r--fs/xfs/xfs_bmap_util.c1897
-rw-r--r--fs/xfs/xfs_bmap_util.h112
-rw-r--r--fs/xfs/xfs_btree.c4278
-rw-r--r--fs/xfs/xfs_btree.h501
-rw-r--r--fs/xfs/xfs_buf.c1890
-rw-r--r--fs/xfs/xfs_buf.h393
-rw-r--r--fs/xfs/xfs_buf_item.c1400
-rw-r--r--fs/xfs/xfs_buf_item.h117
-rw-r--r--fs/xfs/xfs_cap.h70
-rw-r--r--fs/xfs/xfs_cksum.h63
-rw-r--r--fs/xfs/xfs_clnt.h104
-rw-r--r--fs/xfs/xfs_da_btree.c2889
-rw-r--r--fs/xfs/xfs_da_btree.h219
-rw-r--r--fs/xfs/xfs_da_format.c911
-rw-r--r--fs/xfs/xfs_da_format.h861
-rw-r--r--fs/xfs/xfs_dfrag.c375
-rw-r--r--fs/xfs/xfs_dfrag.h53
-rw-r--r--fs/xfs/xfs_dinode.h302
-rw-r--r--fs/xfs/xfs_dir.c1219
-rw-r--r--fs/xfs/xfs_dir.h142
-rw-r--r--fs/xfs/xfs_dir2.c1102
-rw-r--r--fs/xfs/xfs_dir2.h190
-rw-r--r--fs/xfs/xfs_dir2_block.c1225
-rw-r--r--fs/xfs/xfs_dir2_block.h96
-rw-r--r--fs/xfs/xfs_dir2_data.c841
-rw-r--r--fs/xfs/xfs_dir2_data.h190
-rw-r--r--fs/xfs/xfs_dir2_leaf.c1969
-rw-r--r--fs/xfs/xfs_dir2_leaf.h271
-rw-r--r--fs/xfs/xfs_dir2_node.c1940
-rw-r--r--fs/xfs/xfs_dir2_node.h104
-rw-r--r--fs/xfs/xfs_dir2_priv.h274
-rw-r--r--fs/xfs/xfs_dir2_readdir.c700
-rw-r--r--fs/xfs/xfs_dir2_sf.c712
-rw-r--r--fs/xfs/xfs_dir2_sf.h195
-rw-r--r--fs/xfs/xfs_dir2_trace.c216
-rw-r--r--fs/xfs/xfs_dir2_trace.h72
-rw-r--r--fs/xfs/xfs_dir_leaf.c2213
-rw-r--r--fs/xfs/xfs_dir_leaf.h231
-rw-r--r--fs/xfs/xfs_dir_sf.h153
-rw-r--r--fs/xfs/xfs_discard.c240
-rw-r--r--fs/xfs/xfs_discard.h10
-rw-r--r--fs/xfs/xfs_dmapi.h204
-rw-r--r--fs/xfs/xfs_dquot.c1105
-rw-r--r--fs/xfs/xfs_dquot.h173
-rw-r--r--fs/xfs/xfs_dquot_buf.c290
-rw-r--r--fs/xfs/xfs_dquot_item.c445
-rw-r--r--fs/xfs/xfs_dquot_item.h (renamed from fs/xfs/quota/xfs_dquot_item.h)7
-rw-r--r--fs/xfs/xfs_error.c227
-rw-r--r--fs/xfs/xfs_error.h61
-rw-r--r--fs/xfs/xfs_export.c249
-rw-r--r--fs/xfs/xfs_export.h (renamed from fs/xfs/linux-2.6/xfs_export.h)52
-rw-r--r--fs/xfs/xfs_extent_busy.c605
-rw-r--r--fs/xfs/xfs_extent_busy.h73
-rw-r--r--fs/xfs/xfs_extfree_item.c593
-rw-r--r--fs/xfs/xfs_extfree_item.h64
-rw-r--r--fs/xfs/xfs_file.c1433
-rw-r--r--fs/xfs/xfs_filestream.c434
-rw-r--r--fs/xfs/xfs_filestream.h40
-rw-r--r--fs/xfs/xfs_format.h428
-rw-r--r--fs/xfs/xfs_fs.h171
-rw-r--r--fs/xfs/xfs_fsops.c652
-rw-r--r--fs/xfs/xfs_fsops.h2
-rw-r--r--fs/xfs/xfs_globals.c (renamed from fs/xfs/linux-2.6/xfs_globals.c)21
-rw-r--r--fs/xfs/xfs_ialloc.c2649
-rw-r--r--fs/xfs/xfs_ialloc.h87
-rw-r--r--fs/xfs/xfs_ialloc_btree.c2260
-rw-r--r--fs/xfs/xfs_ialloc_btree.h172
-rw-r--r--fs/xfs/xfs_icache.c1327
-rw-r--r--fs/xfs/xfs_icache.h104
-rw-r--r--fs/xfs/xfs_icreate_item.c189
-rw-r--r--fs/xfs/xfs_icreate_item.h (renamed from fs/xfs/linux-2.6/xfs_ioctl32.h)22
-rw-r--r--fs/xfs/xfs_iget.c1043
-rw-r--r--fs/xfs/xfs_imap.h40
-rw-r--r--fs/xfs/xfs_inode.c5183
-rw-r--r--fs/xfs/xfs_inode.h668
-rw-r--r--fs/xfs/xfs_inode_buf.c479
-rw-r--r--fs/xfs/xfs_inode_buf.h50
-rw-r--r--fs/xfs/xfs_inode_fork.c1906
-rw-r--r--fs/xfs/xfs_inode_fork.h171
-rw-r--r--fs/xfs/xfs_inode_item.c1298
-rw-r--r--fs/xfs/xfs_inode_item.h148
-rw-r--r--fs/xfs/xfs_inum.h17
-rw-r--r--fs/xfs/xfs_iocore.c118
-rw-r--r--fs/xfs/xfs_ioctl.c1810
-rw-r--r--fs/xfs/xfs_ioctl.h95
-rw-r--r--fs/xfs/xfs_ioctl32.c681
-rw-r--r--fs/xfs/xfs_ioctl32.h237
-rw-r--r--fs/xfs/xfs_iomap.c1103
-rw-r--r--fs/xfs/xfs_iomap.h72
-rw-r--r--fs/xfs/xfs_iops.c1299
-rw-r--r--fs/xfs/xfs_iops.h (renamed from fs/xfs/linux-2.6/xfs_iops.h)25
-rw-r--r--fs/xfs/xfs_itable.c609
-rw-r--r--fs/xfs/xfs_itable.h55
-rw-r--r--fs/xfs/xfs_linux.h (renamed from fs/xfs/linux-2.6/xfs_linux.h)292
-rw-r--r--fs/xfs/xfs_log.c4092
-rw-r--r--fs/xfs/xfs_log.h221
-rw-r--r--fs/xfs/xfs_log_cil.c972
-rw-r--r--fs/xfs/xfs_log_format.h679
-rw-r--r--fs/xfs/xfs_log_priv.h618
-rw-r--r--fs/xfs/xfs_log_recover.c3883
-rw-r--r--fs/xfs/xfs_log_recover.h25
-rw-r--r--fs/xfs/xfs_log_rlimit.c150
-rw-r--r--fs/xfs/xfs_mac.h106
-rw-r--r--fs/xfs/xfs_message.c114
-rw-r--r--fs/xfs/xfs_message.h64
-rw-r--r--fs/xfs/xfs_mount.c2461
-rw-r--r--fs/xfs/xfs_mount.h593
-rw-r--r--fs/xfs/xfs_mru_cache.c551
-rw-r--r--fs/xfs/xfs_mru_cache.h46
-rw-r--r--fs/xfs/xfs_qm.c1966
-rw-r--r--fs/xfs/xfs_qm.h180
-rw-r--r--fs/xfs/xfs_qm_bhv.c151
-rw-r--r--fs/xfs/xfs_qm_syscalls.c1007
-rw-r--r--fs/xfs/xfs_qmops.c128
-rw-r--r--fs/xfs/xfs_quota.h398
-rw-r--r--fs/xfs/xfs_quota_defs.h161
-rw-r--r--fs/xfs/xfs_quotaops.c175
-rw-r--r--fs/xfs/xfs_refcache.h52
-rw-r--r--fs/xfs/xfs_rename.c635
-rw-r--r--fs/xfs/xfs_rtalloc.c1880
-rw-r--r--fs/xfs/xfs_rtalloc.h112
-rw-r--r--fs/xfs/xfs_rtbitmap.c973
-rw-r--r--fs/xfs/xfs_rw.c341
-rw-r--r--fs/xfs/xfs_rw.h97
-rw-r--r--fs/xfs/xfs_sb.c836
-rw-r--r--fs/xfs/xfs_sb.h537
-rw-r--r--fs/xfs/xfs_shared.h246
-rw-r--r--fs/xfs/xfs_stats.c198
-rw-r--r--fs/xfs/xfs_stats.h (renamed from fs/xfs/linux-2.6/xfs_stats.h)103
-rw-r--r--fs/xfs/xfs_super.c1809
-rw-r--r--fs/xfs/xfs_super.h (renamed from fs/xfs/linux-2.6/xfs_super.h)65
-rw-r--r--fs/xfs/xfs_symlink.c599
-rw-r--r--fs/xfs/xfs_symlink.h (renamed from fs/xfs/linux-2.6/xfs_version.h)20
-rw-r--r--fs/xfs/xfs_symlink_remote.c201
-rw-r--r--fs/xfs/xfs_sysctl.c261
-rw-r--r--fs/xfs/xfs_sysctl.h (renamed from fs/xfs/linux-2.6/xfs_sysctl.h)12
-rw-r--r--fs/xfs/xfs_trace.c56
-rw-r--r--fs/xfs/xfs_trace.h2073
-rw-r--r--fs/xfs/xfs_trans.c1255
-rw-r--r--fs/xfs/xfs_trans.h1001
-rw-r--r--fs/xfs/xfs_trans_ail.c1088
-rw-r--r--fs/xfs/xfs_trans_buf.c1005
-rw-r--r--fs/xfs/xfs_trans_dquot.c (renamed from fs/xfs/quota/xfs_trans_dquot.c)416
-rw-r--r--fs/xfs/xfs_trans_extfree.c41
-rw-r--r--fs/xfs/xfs_trans_inode.c293
-rw-r--r--fs/xfs/xfs_trans_item.c538
-rw-r--r--fs/xfs/xfs_trans_priv.h152
-rw-r--r--fs/xfs/xfs_trans_resv.c894
-rw-r--r--fs/xfs/xfs_trans_resv.h117
-rw-r--r--fs/xfs/xfs_trans_space.h21
-rw-r--r--fs/xfs/xfs_types.h109
-rw-r--r--fs/xfs/xfs_utils.c472
-rw-r--r--fs/xfs/xfs_utils.h38
-rw-r--r--fs/xfs/xfs_vfsops.c1987
-rw-r--r--fs/xfs/xfs_vnode.h (renamed from fs/xfs/xfs_dmops.c)47
-rw-r--r--fs/xfs/xfs_vnodeops.c4682
-rw-r--r--fs/xfs/xfs_xattr.c245
237 files changed, 75393 insertions, 79811 deletions
diff --git a/fs/xfs/Kbuild b/fs/xfs/Kbuild
deleted file mode 100644
index 2566e96706f..00000000000
--- a/fs/xfs/Kbuild
+++ /dev/null
@@ -1,6 +0,0 @@
-#
-# The xfs people like to share Makefile with 2.6 and 2.4.
-# Utilise file named Kbuild file which has precedence over Makefile.
-#
-
-include $(srctree)/$(obj)/Makefile-linux-2.6
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index bac27d66151..399e8cec6e6 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,6 +1,8 @@
config XFS_FS
tristate "XFS filesystem support"
- select EXPORTFS if NFSD!=n
+ depends on BLOCK
+ select EXPORTFS
+ select LIBCRC32C
help
XFS is a high performance journaling filesystem which originated
on the SGI IRIX platform. It is completely multi-threaded, can
@@ -18,14 +20,10 @@ config XFS_FS
system of your root partition is compiled as a module, you'll need
to use an initial ramdisk (initrd) to boot.
-config XFS_EXPORT
- bool
- depends on XFS_FS && EXPORTFS
- default y
-
config XFS_QUOTA
bool "XFS Quota support"
depends on XFS_FS
+ select QUOTACTL
help
If you say Y here, you will be able to set limits for disk usage on
a per user and/or a per group basis under XFS. XFS considers quota
@@ -40,21 +38,10 @@ config XFS_QUOTA
with or without the generic quota support enabled (CONFIG_QUOTA) -
they are completely independent subsystems.
-config XFS_SECURITY
- bool "XFS Security Label support"
- depends on XFS_FS
- help
- Security labels support alternative access control models
- implemented by security modules like SELinux. This option
- enables an extended attribute namespace for inode security
- labels in the XFS filesystem.
-
- If you are not using a security module that requires using
- extended attributes for inode security labels, say N.
-
config XFS_POSIX_ACL
bool "XFS POSIX ACL support"
depends on XFS_FS
+ select FS_POSIX_ACL
help
POSIX Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
@@ -65,18 +52,45 @@ config XFS_POSIX_ACL
If you don't know what Access Control Lists are, say N.
config XFS_RT
- bool "XFS Realtime support (EXPERIMENTAL)"
- depends on XFS_FS && EXPERIMENTAL
+ bool "XFS Realtime subvolume support"
+ depends on XFS_FS
help
If you say Y here you will be able to mount and use XFS filesystems
- which contain a realtime subvolume. The realtime subvolume is a
- separate area of disk space where only file data is stored. The
- realtime subvolume is designed to provide very deterministic
- data rates suitable for media streaming applications.
+ which contain a realtime subvolume. The realtime subvolume is a
+ separate area of disk space where only file data is stored. It was
+ originally designed to provide deterministic data rates suitable
+ for media streaming applications, but is also useful as a generic
+ mechanism for ensuring data and metadata/log I/Os are completely
+ separated. Regular file I/Os are isolated to a separate device
+ from all other requests, and this can be done quite transparently
+ to applications via the inherit-realtime directory inode flag.
- See the xfs man page in section 5 for a bit more information.
-
- This feature is unsupported at this time, is not yet fully
- functional, and may cause serious problems.
+ See the xfs man page in section 5 for additional information.
If unsure, say N.
+
+config XFS_WARN
+ bool "XFS Verbose Warnings"
+ depends on XFS_FS && !XFS_DEBUG
+ help
+ Say Y here to get an XFS build with many additional warnings.
+ It converts ASSERT checks to WARN, so will log any out-of-bounds
+ conditions that occur that would otherwise be missed. It is much
+ lighter weight than XFS_DEBUG and does not modify algorithms and will
+ not cause the kernel to panic on non-fatal errors.
+
+ However, similar to XFS_DEBUG, it is only advisable to use this if you
+ are debugging a particular problem.
+
+config XFS_DEBUG
+ bool "XFS Debugging support"
+ depends on XFS_FS
+ help
+ Say Y here to get an XFS build with many debugging features,
+ including ASSERT checks, function wrappers around macros,
+ and extra sanity-checking functions in various code paths.
+
+ Note that the resulting code will be HUGE and SLOW, and probably
+ not useful unless you are debugging a particular problem.
+
+ Say N unless you are an XFS developer, or you play one on TV.
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 49e3e7e5e3d..c21f4350666 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -1 +1,116 @@
-include $(TOPDIR)/fs/xfs/Makefile-linux-$(VERSION).$(PATCHLEVEL)
+#
+# Copyright (c) 2000-2005 Silicon Graphics, Inc.
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+#
+
+ccflags-y += -I$(src) # needed for trace events
+
+ccflags-$(CONFIG_XFS_DEBUG) += -g
+
+obj-$(CONFIG_XFS_FS) += xfs.o
+
+# this one should be compiled first, as the tracing macros can easily blow up
+xfs-y += xfs_trace.o
+
+# highlevel code
+xfs-y += xfs_aops.o \
+ xfs_attr_inactive.o \
+ xfs_attr_list.o \
+ xfs_bit.o \
+ xfs_bmap_util.o \
+ xfs_buf.o \
+ xfs_dir2_readdir.o \
+ xfs_discard.o \
+ xfs_error.o \
+ xfs_export.o \
+ xfs_extent_busy.o \
+ xfs_file.o \
+ xfs_filestream.o \
+ xfs_fsops.o \
+ xfs_globals.o \
+ xfs_icache.o \
+ xfs_ioctl.o \
+ xfs_iomap.o \
+ xfs_iops.o \
+ xfs_itable.o \
+ xfs_message.o \
+ xfs_mount.o \
+ xfs_mru_cache.o \
+ xfs_super.o \
+ xfs_symlink.o \
+ xfs_trans.o \
+ xfs_xattr.o \
+ kmem.o \
+ uuid.o
+
+# code shared with libxfs
+xfs-y += xfs_alloc.o \
+ xfs_alloc_btree.o \
+ xfs_attr.o \
+ xfs_attr_leaf.o \
+ xfs_attr_remote.o \
+ xfs_bmap.o \
+ xfs_bmap_btree.o \
+ xfs_btree.o \
+ xfs_da_btree.o \
+ xfs_da_format.o \
+ xfs_dir2.o \
+ xfs_dir2_block.o \
+ xfs_dir2_data.o \
+ xfs_dir2_leaf.o \
+ xfs_dir2_node.o \
+ xfs_dir2_sf.o \
+ xfs_dquot_buf.o \
+ xfs_ialloc.o \
+ xfs_ialloc_btree.o \
+ xfs_icreate_item.o \
+ xfs_inode.o \
+ xfs_inode_fork.o \
+ xfs_inode_buf.o \
+ xfs_log_recover.o \
+ xfs_log_rlimit.o \
+ xfs_sb.o \
+ xfs_symlink_remote.o \
+ xfs_trans_resv.o
+
+# low-level transaction/log code
+xfs-y += xfs_log.o \
+ xfs_log_cil.o \
+ xfs_buf_item.o \
+ xfs_extfree_item.o \
+ xfs_inode_item.o \
+ xfs_trans_ail.o \
+ xfs_trans_buf.o \
+ xfs_trans_extfree.o \
+ xfs_trans_inode.o \
+
+# optional features
+xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
+ xfs_dquot_item.o \
+ xfs_trans_dquot.o \
+ xfs_qm_syscalls.o \
+ xfs_qm_bhv.o \
+ xfs_qm.o \
+ xfs_quotaops.o
+
+# xfs_rtbitmap is shared with libxfs
+xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o \
+ xfs_rtbitmap.o
+
+xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
+xfs-$(CONFIG_PROC_FS) += xfs_stats.o
+xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
+xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
deleted file mode 100644
index 97bd4743b46..00000000000
--- a/fs/xfs/Makefile-linux-2.6
+++ /dev/null
@@ -1,151 +0,0 @@
-#
-# Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of version 2 of the GNU General Public License as
-# published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it would be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# Further, this software is distributed without any warranty that it is
-# free of the rightful claim of any third person regarding infringement
-# or the like. Any license provided herein, whether implied or
-# otherwise, applies only to this software file. Patent licenses, if
-# any, provided herein do not apply to combinations of this program with
-# other software, or any other product whatsoever.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write the Free Software Foundation, Inc., 59
-# Temple Place - Suite 330, Boston MA 02111-1307, USA.
-#
-# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
-# Mountain View, CA 94043, or:
-#
-# http://www.sgi.com
-#
-# For further information regarding this notice, see:
-#
-# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
-#
-
-EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
-
-XFS_LINUX := linux-2.6
-
-ifeq ($(CONFIG_XFS_DEBUG),y)
- EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
- EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING
-endif
-ifeq ($(CONFIG_XFS_TRACE),y)
- EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
- EXTRA_CFLAGS += -DXFS_ATTR_TRACE
- EXTRA_CFLAGS += -DXFS_BLI_TRACE
- EXTRA_CFLAGS += -DXFS_BMAP_TRACE
- EXTRA_CFLAGS += -DXFS_BMBT_TRACE
- EXTRA_CFLAGS += -DXFS_DIR_TRACE
- EXTRA_CFLAGS += -DXFS_DIR2_TRACE
- EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
- EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
- EXTRA_CFLAGS += -DXFS_LOG_TRACE
- EXTRA_CFLAGS += -DXFS_RW_TRACE
- EXTRA_CFLAGS += -DPAGEBUF_TRACE
- EXTRA_CFLAGS += -DXFS_VNODE_TRACE
-endif
-
-obj-$(CONFIG_XFS_FS) += xfs.o
-
-xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
- xfs_dquot.o \
- xfs_dquot_item.o \
- xfs_trans_dquot.o \
- xfs_qm_syscalls.o \
- xfs_qm_bhv.o \
- xfs_qm.o)
-
-ifeq ($(CONFIG_XFS_QUOTA),y)
-xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
-endif
-
-xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
-xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o
-xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o
-xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o
-xfs-$(CONFIG_XFS_EXPORT) += $(XFS_LINUX)/xfs_export.o
-
-
-xfs-y += xfs_alloc.o \
- xfs_alloc_btree.o \
- xfs_attr.o \
- xfs_attr_leaf.o \
- xfs_behavior.o \
- xfs_bit.o \
- xfs_bmap.o \
- xfs_bmap_btree.o \
- xfs_btree.o \
- xfs_buf_item.o \
- xfs_da_btree.o \
- xfs_dir.o \
- xfs_dir2.o \
- xfs_dir2_block.o \
- xfs_dir2_data.o \
- xfs_dir2_leaf.o \
- xfs_dir2_node.o \
- xfs_dir2_sf.o \
- xfs_dir_leaf.o \
- xfs_error.o \
- xfs_extfree_item.o \
- xfs_fsops.o \
- xfs_ialloc.o \
- xfs_ialloc_btree.o \
- xfs_iget.o \
- xfs_inode.o \
- xfs_inode_item.o \
- xfs_iocore.o \
- xfs_iomap.o \
- xfs_itable.o \
- xfs_dfrag.o \
- xfs_log.o \
- xfs_log_recover.o \
- xfs_mount.o \
- xfs_rename.o \
- xfs_trans.o \
- xfs_trans_ail.o \
- xfs_trans_buf.o \
- xfs_trans_extfree.o \
- xfs_trans_inode.o \
- xfs_trans_item.o \
- xfs_utils.o \
- xfs_vfsops.o \
- xfs_vnodeops.o \
- xfs_rw.o \
- xfs_dmops.o \
- xfs_qmops.o
-
-xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o
-
-# Objects in linux/
-xfs-y += $(addprefix $(XFS_LINUX)/, \
- kmem.o \
- xfs_aops.o \
- xfs_buf.o \
- xfs_file.o \
- xfs_fs_subr.o \
- xfs_globals.o \
- xfs_ioctl.o \
- xfs_iops.o \
- xfs_lrw.o \
- xfs_super.o \
- xfs_vfs.o \
- xfs_vnode.o)
-
-# Objects in support/
-xfs-y += $(addprefix support/, \
- debug.o \
- move.o \
- uuid.o)
-
-xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o
-
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
new file mode 100644
index 00000000000..844e288b957
--- /dev/null
+++ b/fs/xfs/kmem.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+#include "time.h"
+#include "kmem.h"
+#include "xfs_message.h"
+
+/*
+ * Greedy allocation. May fail and may return vmalloced memory.
+ */
+void *
+kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
+{
+ void *ptr;
+ size_t kmsize = maxsize;
+
+ while (!(ptr = vzalloc(kmsize))) {
+ if ((kmsize >>= 1) <= minsize)
+ kmsize = minsize;
+ }
+ if (ptr)
+ *size = kmsize;
+ return ptr;
+}
+
+void *
+kmem_alloc(size_t size, xfs_km_flags_t flags)
+{
+ int retries = 0;
+ gfp_t lflags = kmem_flags_convert(flags);
+ void *ptr;
+
+ do {
+ ptr = kmalloc(size, lflags);
+ if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+ return ptr;
+ if (!(++retries % 100))
+ xfs_err(NULL,
+ "possible memory allocation deadlock in %s (mode:0x%x)",
+ __func__, lflags);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ } while (1);
+}
+
+void *
+kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
+{
+ unsigned noio_flag = 0;
+ void *ptr;
+ gfp_t lflags;
+
+ ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
+ if (ptr)
+ return ptr;
+
+ /*
+ * __vmalloc() will allocate data pages and auxillary structures (e.g.
+ * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context
+ * here. Hence we need to tell memory reclaim that we are in such a
+ * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering
+ * the filesystem here and potentially deadlocking.
+ */
+ if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+ noio_flag = memalloc_noio_save();
+
+ lflags = kmem_flags_convert(flags);
+ ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+
+ if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+ memalloc_noio_restore(noio_flag);
+
+ return ptr;
+}
+
+void
+kmem_free(const void *ptr)
+{
+ if (!is_vmalloc_addr(ptr)) {
+ kfree(ptr);
+ } else {
+ vfree(ptr);
+ }
+}
+
+void *
+kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
+ xfs_km_flags_t flags)
+{
+ void *new;
+
+ new = kmem_alloc(newsize, flags);
+ if (ptr) {
+ if (new)
+ memcpy(new, ptr,
+ ((oldsize < newsize) ? oldsize : newsize));
+ kmem_free(ptr);
+ }
+ return new;
+}
+
+void *
+kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags)
+{
+ int retries = 0;
+ gfp_t lflags = kmem_flags_convert(flags);
+ void *ptr;
+
+ do {
+ ptr = kmem_cache_alloc(zone, lflags);
+ if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+ return ptr;
+ if (!(++retries % 100))
+ xfs_err(NULL,
+ "possible memory allocation deadlock in %s (mode:0x%x)",
+ __func__, lflags);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ } while (1);
+}
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
new file mode 100644
index 00000000000..64db0e53ede
--- /dev/null
+++ b/fs/xfs/kmem.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_SUPPORT_KMEM_H__
+#define __XFS_SUPPORT_KMEM_H__
+
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+/*
+ * General memory allocation interfaces
+ */
+
+typedef unsigned __bitwise xfs_km_flags_t;
+#define KM_SLEEP ((__force xfs_km_flags_t)0x0001u)
+#define KM_NOSLEEP ((__force xfs_km_flags_t)0x0002u)
+#define KM_NOFS ((__force xfs_km_flags_t)0x0004u)
+#define KM_MAYFAIL ((__force xfs_km_flags_t)0x0008u)
+#define KM_ZERO ((__force xfs_km_flags_t)0x0010u)
+
+/*
+ * We use a special process flag to avoid recursive callbacks into
+ * the filesystem during transactions. We will also issue our own
+ * warnings, so we explicitly skip any generic ones (silly of us).
+ */
+static inline gfp_t
+kmem_flags_convert(xfs_km_flags_t flags)
+{
+ gfp_t lflags;
+
+ BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_ZERO));
+
+ if (flags & KM_NOSLEEP) {
+ lflags = GFP_ATOMIC | __GFP_NOWARN;
+ } else {
+ lflags = GFP_KERNEL | __GFP_NOWARN;
+ if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+ lflags &= ~__GFP_FS;
+ }
+
+ if (flags & KM_ZERO)
+ lflags |= __GFP_ZERO;
+
+ return lflags;
+}
+
+extern void *kmem_alloc(size_t, xfs_km_flags_t);
+extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
+extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t);
+extern void kmem_free(const void *);
+
+
+extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
+
+static inline void *
+kmem_zalloc(size_t size, xfs_km_flags_t flags)
+{
+ return kmem_alloc(size, flags | KM_ZERO);
+}
+
+/*
+ * Zone interfaces
+ */
+
+#define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN
+#define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT
+#define KM_ZONE_SPREAD SLAB_MEM_SPREAD
+
+#define kmem_zone kmem_cache
+#define kmem_zone_t struct kmem_cache
+
+static inline kmem_zone_t *
+kmem_zone_init(int size, char *zone_name)
+{
+ return kmem_cache_create(zone_name, size, 0, 0, NULL);
+}
+
+static inline kmem_zone_t *
+kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
+ void (*construct)(void *))
+{
+ return kmem_cache_create(zone_name, size, 0, flags, construct);
+}
+
+static inline void
+kmem_zone_free(kmem_zone_t *zone, void *ptr)
+{
+ kmem_cache_free(zone, ptr);
+}
+
+static inline void
+kmem_zone_destroy(kmem_zone_t *zone)
+{
+ if (zone)
+ kmem_cache_destroy(zone);
+}
+
+extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t);
+
+static inline void *
+kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags)
+{
+ return kmem_zone_alloc(zone, flags | KM_ZERO);
+}
+
+#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
deleted file mode 100644
index aba7fcf881a..00000000000
--- a/fs/xfs/linux-2.6/kmem.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-#include <linux/highmem.h>
-#include <linux/swap.h>
-#include <linux/blkdev.h>
-#include "time.h"
-#include "kmem.h"
-
-#define MAX_VMALLOCS 6
-#define MAX_SLAB_SIZE 0x20000
-
-void *
-kmem_alloc(size_t size, unsigned int __nocast flags)
-{
- int retries = 0;
- gfp_t lflags = kmem_flags_convert(flags);
- void *ptr;
-
- do {
- if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
- ptr = kmalloc(size, lflags);
- else
- ptr = __vmalloc(size, lflags, PAGE_KERNEL);
- if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
- return ptr;
- if (!(++retries % 100))
- printk(KERN_ERR "XFS: possible memory allocation "
- "deadlock in %s (mode:0x%x)\n",
- __FUNCTION__, lflags);
- blk_congestion_wait(WRITE, HZ/50);
- } while (1);
-}
-
-void *
-kmem_zalloc(size_t size, unsigned int __nocast flags)
-{
- void *ptr;
-
- ptr = kmem_alloc(size, flags);
- if (ptr)
- memset((char *)ptr, 0, (int)size);
- return ptr;
-}
-
-void
-kmem_free(void *ptr, size_t size)
-{
- if (((unsigned long)ptr < VMALLOC_START) ||
- ((unsigned long)ptr >= VMALLOC_END)) {
- kfree(ptr);
- } else {
- vfree(ptr);
- }
-}
-
-void *
-kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
- unsigned int __nocast flags)
-{
- void *new;
-
- new = kmem_alloc(newsize, flags);
- if (ptr) {
- if (new)
- memcpy(new, ptr,
- ((oldsize < newsize) ? oldsize : newsize));
- kmem_free(ptr, oldsize);
- }
- return new;
-}
-
-void *
-kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
-{
- int retries = 0;
- gfp_t lflags = kmem_flags_convert(flags);
- void *ptr;
-
- do {
- ptr = kmem_cache_alloc(zone, lflags);
- if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
- return ptr;
- if (!(++retries % 100))
- printk(KERN_ERR "XFS: possible memory allocation "
- "deadlock in %s (mode:0x%x)\n",
- __FUNCTION__, lflags);
- blk_congestion_wait(WRITE, HZ/50);
- } while (1);
-}
-
-void *
-kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
-{
- void *ptr;
-
- ptr = kmem_zone_alloc(zone, flags);
- if (ptr)
- memset((char *)ptr, 0, kmem_cache_size(zone));
- return ptr;
-}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
deleted file mode 100644
index c64a29cdfff..00000000000
--- a/fs/xfs/linux-2.6/kmem.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_KMEM_H__
-#define __XFS_SUPPORT_KMEM_H__
-
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-
-/*
- * memory management routines
- */
-#define KM_SLEEP 0x0001u
-#define KM_NOSLEEP 0x0002u
-#define KM_NOFS 0x0004u
-#define KM_MAYFAIL 0x0008u
-
-#define kmem_zone kmem_cache
-#define kmem_zone_t struct kmem_cache
-
-typedef unsigned long xfs_pflags_t;
-
-#define PFLAGS_TEST_NOIO() (current->flags & PF_NOIO)
-#define PFLAGS_TEST_FSTRANS() (current->flags & PF_FSTRANS)
-
-#define PFLAGS_SET_NOIO() do { \
- current->flags |= PF_NOIO; \
-} while (0)
-
-#define PFLAGS_CLEAR_NOIO() do { \
- current->flags &= ~PF_NOIO; \
-} while (0)
-
-/* these could be nested, so we save state */
-#define PFLAGS_SET_FSTRANS(STATEP) do { \
- *(STATEP) = current->flags; \
- current->flags |= PF_FSTRANS; \
-} while (0)
-
-#define PFLAGS_CLEAR_FSTRANS(STATEP) do { \
- *(STATEP) = current->flags; \
- current->flags &= ~PF_FSTRANS; \
-} while (0)
-
-/* Restore the PF_FSTRANS state to what was saved in STATEP */
-#define PFLAGS_RESTORE_FSTRANS(STATEP) do { \
- current->flags = ((current->flags & ~PF_FSTRANS) | \
- (*(STATEP) & PF_FSTRANS)); \
-} while (0)
-
-#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \
- *(NSTATEP) = *(OSTATEP); \
-} while (0)
-
-static __inline gfp_t kmem_flags_convert(unsigned int __nocast flags)
-{
- gfp_t lflags = __GFP_NOWARN; /* we'll report problems, if need be */
-
-#ifdef DEBUG
- if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
- printk(KERN_WARNING
- "XFS: memory allocation with wrong flags (%x)\n", flags);
- BUG();
- }
-#endif
-
- if (flags & KM_NOSLEEP) {
- lflags |= GFP_ATOMIC;
- } else {
- lflags |= GFP_KERNEL;
-
- /* avoid recusive callbacks to filesystem during transactions */
- if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS))
- lflags &= ~__GFP_FS;
- }
-
- return lflags;
-}
-
-static __inline kmem_zone_t *
-kmem_zone_init(int size, char *zone_name)
-{
- return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL);
-}
-
-static __inline void
-kmem_zone_free(kmem_zone_t *zone, void *ptr)
-{
- kmem_cache_free(zone, ptr);
-}
-
-static __inline void
-kmem_zone_destroy(kmem_zone_t *zone)
-{
- if (zone && kmem_cache_destroy(zone))
- BUG();
-}
-
-extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
-extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
-
-extern void *kmem_alloc(size_t, unsigned int __nocast);
-extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
-extern void *kmem_zalloc(size_t, unsigned int __nocast);
-extern void kmem_free(void *, size_t);
-
-typedef struct shrinker *kmem_shaker_t;
-typedef int (*kmem_shake_func_t)(int, gfp_t);
-
-static __inline kmem_shaker_t
-kmem_shake_register(kmem_shake_func_t sfunc)
-{
- return set_shrinker(DEFAULT_SEEKS, sfunc);
-}
-
-static __inline void
-kmem_shake_deregister(kmem_shaker_t shrinker)
-{
- remove_shrinker(shrinker);
-}
-
-static __inline int
-kmem_shake_allow(gfp_t gfp_mask)
-{
- return (gfp_mask & __GFP_WAIT);
-}
-
-#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
deleted file mode 100644
index 194a84490bd..00000000000
--- a/fs/xfs/linux-2.6/sema.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_SEMA_H__
-#define __XFS_SUPPORT_SEMA_H__
-
-#include <linux/time.h>
-#include <linux/wait.h>
-#include <asm/atomic.h>
-#include <asm/semaphore.h>
-
-/*
- * sema_t structure just maps to struct semaphore in Linux kernel.
- */
-
-typedef struct semaphore sema_t;
-
-#define init_sema(sp, val, c, d) sema_init(sp, val)
-#define initsema(sp, val) sema_init(sp, val)
-#define initnsema(sp, val, name) sema_init(sp, val)
-#define psema(sp, b) down(sp)
-#define vsema(sp) up(sp)
-#define valusema(sp) (atomic_read(&(sp)->count))
-#define freesema(sema)
-
-/*
- * Map cpsema (try to get the sema) to down_trylock. We need to switch
- * the return values since cpsema returns 1 (acquired) 0 (failed) and
- * down_trylock returns the reverse 0 (acquired) 1 (failed).
- */
-
-#define cpsema(sp) (down_trylock(sp) ? 0 : 1)
-
-/*
- * Didn't do cvsema(sp). Not sure how to map this to up/down/...
- * It does a vsema if the values is < 0 other wise nothing.
- */
-
-#endif /* __XFS_SUPPORT_SEMA_H__ */
diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h
deleted file mode 100644
index 50a6191178f..00000000000
--- a/fs/xfs/linux-2.6/spin.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_SPIN_H__
-#define __XFS_SUPPORT_SPIN_H__
-
-#include <linux/sched.h> /* preempt needs this */
-#include <linux/spinlock.h>
-
-/*
- * Map lock_t from IRIX to Linux spinlocks.
- *
- * We do not make use of lock_t from interrupt context, so we do not
- * have to worry about disabling interrupts at all (unlike IRIX).
- */
-
-typedef spinlock_t lock_t;
-
-#define SPLDECL(s) unsigned long s
-#ifndef DEFINE_SPINLOCK
-#define DEFINE_SPINLOCK(s) spinlock_t s = SPIN_LOCK_UNLOCKED
-#endif
-
-#define spinlock_init(lock, name) spin_lock_init(lock)
-#define spinlock_destroy(lock)
-#define mutex_spinlock(lock) ({ spin_lock(lock); 0; })
-#define mutex_spinunlock(lock, s) do { spin_unlock(lock); (void)s; } while (0)
-#define nested_spinlock(lock) spin_lock(lock)
-#define nested_spinunlock(lock) spin_unlock(lock)
-
-#endif /* __XFS_SUPPORT_SPIN_H__ */
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
deleted file mode 100644
index 9a8ad481b00..00000000000
--- a/fs/xfs/linux-2.6/sv.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_SV_H__
-#define __XFS_SUPPORT_SV_H__
-
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-
-/*
- * Synchronisation variables.
- *
- * (Parameters "pri", "svf" and "rts" are not implemented)
- */
-
-typedef struct sv_s {
- wait_queue_head_t waiters;
-} sv_t;
-
-#define SV_FIFO 0x0 /* sv_t is FIFO type */
-#define SV_LIFO 0x2 /* sv_t is LIFO type */
-#define SV_PRIO 0x4 /* sv_t is PRIO type */
-#define SV_KEYED 0x6 /* sv_t is KEYED type */
-#define SV_DEFAULT SV_FIFO
-
-
-static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
- unsigned long timeout)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue_exclusive(&sv->waiters, &wait);
- __set_current_state(state);
- spin_unlock(lock);
-
- schedule_timeout(timeout);
-
- remove_wait_queue(&sv->waiters, &wait);
-}
-
-#define init_sv(sv,type,name,flag) \
- init_waitqueue_head(&(sv)->waiters)
-#define sv_init(sv,flag,name) \
- init_waitqueue_head(&(sv)->waiters)
-#define sv_destroy(sv) \
- /*NOTHING*/
-#define sv_wait(sv, pri, lock, s) \
- _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
-#define sv_wait_sig(sv, pri, lock, s) \
- _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
-#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \
- _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts))
-#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \
- _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts))
-#define sv_signal(sv) \
- wake_up(&(sv)->waiters)
-#define sv_broadcast(sv) \
- wake_up_all(&(sv)->waiters)
-
-#endif /* __XFS_SUPPORT_SV_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
deleted file mode 100644
index 9892268e300..00000000000
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ /dev/null
@@ -1,1466 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_trans.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_btree.h"
-#include "xfs_error.h"
-#include "xfs_rw.h"
-#include "xfs_iomap.h"
-#include <linux/mpage.h>
-#include <linux/pagevec.h>
-#include <linux/writeback.h>
-
-STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
-
-#if defined(XFS_RW_TRACE)
-void
-xfs_page_trace(
- int tag,
- struct inode *inode,
- struct page *page,
- int mask)
-{
- xfs_inode_t *ip;
- vnode_t *vp = LINVFS_GET_VP(inode);
- loff_t isize = i_size_read(inode);
- loff_t offset = page_offset(page);
- int delalloc = -1, unmapped = -1, unwritten = -1;
-
- if (page_has_buffers(page))
- xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
-
- ip = xfs_vtoi(vp);
- if (!ip->i_rwtrace)
- return;
-
- ktrace_enter(ip->i_rwtrace,
- (void *)((unsigned long)tag),
- (void *)ip,
- (void *)inode,
- (void *)page,
- (void *)((unsigned long)mask),
- (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
- (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
- (void *)((unsigned long)((isize >> 32) & 0xffffffff)),
- (void *)((unsigned long)(isize & 0xffffffff)),
- (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
- (void *)((unsigned long)(offset & 0xffffffff)),
- (void *)((unsigned long)delalloc),
- (void *)((unsigned long)unmapped),
- (void *)((unsigned long)unwritten),
- (void *)NULL,
- (void *)NULL);
-}
-#else
-#define xfs_page_trace(tag, inode, page, mask)
-#endif
-
-/*
- * Schedule IO completion handling on a xfsdatad if this was
- * the final hold on this ioend.
- */
-STATIC void
-xfs_finish_ioend(
- xfs_ioend_t *ioend)
-{
- if (atomic_dec_and_test(&ioend->io_remaining))
- queue_work(xfsdatad_workqueue, &ioend->io_work);
-}
-
-/*
- * We're now finished for good with this ioend structure.
- * Update the page state via the associated buffer_heads,
- * release holds on the inode and bio, and finally free
- * up memory. Do not use the ioend after this.
- */
-STATIC void
-xfs_destroy_ioend(
- xfs_ioend_t *ioend)
-{
- struct buffer_head *bh, *next;
-
- for (bh = ioend->io_buffer_head; bh; bh = next) {
- next = bh->b_private;
- bh->b_end_io(bh, ioend->io_uptodate);
- }
-
- vn_iowake(ioend->io_vnode);
- mempool_free(ioend, xfs_ioend_pool);
-}
-
-/*
- * Buffered IO write completion for delayed allocate extents.
- * TODO: Update ondisk isize now that we know the file data
- * has been flushed (i.e. the notorious "NULL file" problem).
- */
-STATIC void
-xfs_end_bio_delalloc(
- void *data)
-{
- xfs_ioend_t *ioend = data;
-
- xfs_destroy_ioend(ioend);
-}
-
-/*
- * Buffered IO write completion for regular, written extents.
- */
-STATIC void
-xfs_end_bio_written(
- void *data)
-{
- xfs_ioend_t *ioend = data;
-
- xfs_destroy_ioend(ioend);
-}
-
-/*
- * IO write completion for unwritten extents.
- *
- * Issue transactions to convert a buffer range from unwritten
- * to written extents.
- */
-STATIC void
-xfs_end_bio_unwritten(
- void *data)
-{
- xfs_ioend_t *ioend = data;
- vnode_t *vp = ioend->io_vnode;
- xfs_off_t offset = ioend->io_offset;
- size_t size = ioend->io_size;
- int error;
-
- if (ioend->io_uptodate)
- VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
- xfs_destroy_ioend(ioend);
-}
-
-/*
- * Allocate and initialise an IO completion structure.
- * We need to track unwritten extent write completion here initially.
- * We'll need to extend this for updating the ondisk inode size later
- * (vs. incore size).
- */
-STATIC xfs_ioend_t *
-xfs_alloc_ioend(
- struct inode *inode,
- unsigned int type)
-{
- xfs_ioend_t *ioend;
-
- ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
-
- /*
- * Set the count to 1 initially, which will prevent an I/O
- * completion callback from happening before we have started
- * all the I/O from calling the completion routine too early.
- */
- atomic_set(&ioend->io_remaining, 1);
- ioend->io_uptodate = 1; /* cleared if any I/O fails */
- ioend->io_list = NULL;
- ioend->io_type = type;
- ioend->io_vnode = LINVFS_GET_VP(inode);
- ioend->io_buffer_head = NULL;
- ioend->io_buffer_tail = NULL;
- atomic_inc(&ioend->io_vnode->v_iocount);
- ioend->io_offset = 0;
- ioend->io_size = 0;
-
- if (type == IOMAP_UNWRITTEN)
- INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
- else if (type == IOMAP_DELAY)
- INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend);
- else
- INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend);
-
- return ioend;
-}
-
-STATIC int
-xfs_map_blocks(
- struct inode *inode,
- loff_t offset,
- ssize_t count,
- xfs_iomap_t *mapp,
- int flags)
-{
- vnode_t *vp = LINVFS_GET_VP(inode);
- int error, nmaps = 1;
-
- VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error);
- if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)))
- VMODIFY(vp);
- return -error;
-}
-
-STATIC inline int
-xfs_iomap_valid(
- xfs_iomap_t *iomapp,
- loff_t offset)
-{
- return offset >= iomapp->iomap_offset &&
- offset < iomapp->iomap_offset + iomapp->iomap_bsize;
-}
-
-/*
- * BIO completion handler for buffered IO.
- */
-STATIC int
-xfs_end_bio(
- struct bio *bio,
- unsigned int bytes_done,
- int error)
-{
- xfs_ioend_t *ioend = bio->bi_private;
-
- if (bio->bi_size)
- return 1;
-
- ASSERT(ioend);
- ASSERT(atomic_read(&bio->bi_cnt) >= 1);
-
- /* Toss bio and pass work off to an xfsdatad thread */
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- ioend->io_uptodate = 0;
- bio->bi_private = NULL;
- bio->bi_end_io = NULL;
-
- bio_put(bio);
- xfs_finish_ioend(ioend);
- return 0;
-}
-
-STATIC void
-xfs_submit_ioend_bio(
- xfs_ioend_t *ioend,
- struct bio *bio)
-{
- atomic_inc(&ioend->io_remaining);
-
- bio->bi_private = ioend;
- bio->bi_end_io = xfs_end_bio;
-
- submit_bio(WRITE, bio);
- ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
- bio_put(bio);
-}
-
-STATIC struct bio *
-xfs_alloc_ioend_bio(
- struct buffer_head *bh)
-{
- struct bio *bio;
- int nvecs = bio_get_nr_vecs(bh->b_bdev);
-
- do {
- bio = bio_alloc(GFP_NOIO, nvecs);
- nvecs >>= 1;
- } while (!bio);
-
- ASSERT(bio->bi_private == NULL);
- bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
- bio_get(bio);
- return bio;
-}
-
-STATIC void
-xfs_start_buffer_writeback(
- struct buffer_head *bh)
-{
- ASSERT(buffer_mapped(bh));
- ASSERT(buffer_locked(bh));
- ASSERT(!buffer_delay(bh));
- ASSERT(!buffer_unwritten(bh));
-
- mark_buffer_async_write(bh);
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
-}
-
-STATIC void
-xfs_start_page_writeback(
- struct page *page,
- struct writeback_control *wbc,
- int clear_dirty,
- int buffers)
-{
- ASSERT(PageLocked(page));
- ASSERT(!PageWriteback(page));
- set_page_writeback(page);
- if (clear_dirty)
- clear_page_dirty(page);
- unlock_page(page);
- if (!buffers) {
- end_page_writeback(page);
- wbc->pages_skipped++; /* We didn't write this page */
- }
-}
-
-static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
-{
- return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
-}
-
-/*
- * Submit all of the bios for all of the ioends we have saved up, covering the
- * initial writepage page and also any probed pages.
- *
- * Because we may have multiple ioends spanning a page, we need to start
- * writeback on all the buffers before we submit them for I/O. If we mark the
- * buffers as we got, then we can end up with a page that only has buffers
- * marked async write and I/O complete on can occur before we mark the other
- * buffers async write.
- *
- * The end result of this is that we trip a bug in end_page_writeback() because
- * we call it twice for the one page as the code in end_buffer_async_write()
- * assumes that all buffers on the page are started at the same time.
- *
- * The fix is two passes across the ioend list - one to start writeback on the
- * bufferheads, and then the second one submit them for I/O.
- */
-STATIC void
-xfs_submit_ioend(
- xfs_ioend_t *ioend)
-{
- xfs_ioend_t *head = ioend;
- xfs_ioend_t *next;
- struct buffer_head *bh;
- struct bio *bio;
- sector_t lastblock = 0;
-
- /* Pass 1 - start writeback */
- do {
- next = ioend->io_list;
- for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
- xfs_start_buffer_writeback(bh);
- }
- } while ((ioend = next) != NULL);
-
- /* Pass 2 - submit I/O */
- ioend = head;
- do {
- next = ioend->io_list;
- bio = NULL;
-
- for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
-
- if (!bio) {
- retry:
- bio = xfs_alloc_ioend_bio(bh);
- } else if (bh->b_blocknr != lastblock + 1) {
- xfs_submit_ioend_bio(ioend, bio);
- goto retry;
- }
-
- if (bio_add_buffer(bio, bh) != bh->b_size) {
- xfs_submit_ioend_bio(ioend, bio);
- goto retry;
- }
-
- lastblock = bh->b_blocknr;
- }
- if (bio)
- xfs_submit_ioend_bio(ioend, bio);
- xfs_finish_ioend(ioend);
- } while ((ioend = next) != NULL);
-}
-
-/*
- * Cancel submission of all buffer_heads so far in this endio.
- * Toss the endio too. Only ever called for the initial page
- * in a writepage request, so only ever one page.
- */
-STATIC void
-xfs_cancel_ioend(
- xfs_ioend_t *ioend)
-{
- xfs_ioend_t *next;
- struct buffer_head *bh, *next_bh;
-
- do {
- next = ioend->io_list;
- bh = ioend->io_buffer_head;
- do {
- next_bh = bh->b_private;
- clear_buffer_async_write(bh);
- unlock_buffer(bh);
- } while ((bh = next_bh) != NULL);
-
- vn_iowake(ioend->io_vnode);
- mempool_free(ioend, xfs_ioend_pool);
- } while ((ioend = next) != NULL);
-}
-
-/*
- * Test to see if we've been building up a completion structure for
- * earlier buffers -- if so, we try to append to this ioend if we
- * can, otherwise we finish off any current ioend and start another.
- * Return true if we've finished the given ioend.
- */
-STATIC void
-xfs_add_to_ioend(
- struct inode *inode,
- struct buffer_head *bh,
- xfs_off_t offset,
- unsigned int type,
- xfs_ioend_t **result,
- int need_ioend)
-{
- xfs_ioend_t *ioend = *result;
-
- if (!ioend || need_ioend || type != ioend->io_type) {
- xfs_ioend_t *previous = *result;
-
- ioend = xfs_alloc_ioend(inode, type);
- ioend->io_offset = offset;
- ioend->io_buffer_head = bh;
- ioend->io_buffer_tail = bh;
- if (previous)
- previous->io_list = ioend;
- *result = ioend;
- } else {
- ioend->io_buffer_tail->b_private = bh;
- ioend->io_buffer_tail = bh;
- }
-
- bh->b_private = NULL;
- ioend->io_size += bh->b_size;
-}
-
-STATIC void
-xfs_map_at_offset(
- struct buffer_head *bh,
- loff_t offset,
- int block_bits,
- xfs_iomap_t *iomapp)
-{
- xfs_daddr_t bn;
- int sector_shift;
-
- ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
- ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
- ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
-
- sector_shift = block_bits - BBSHIFT;
- bn = (iomapp->iomap_bn >> sector_shift) +
- ((offset - iomapp->iomap_offset) >> block_bits);
-
- ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME));
- ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
-
- lock_buffer(bh);
- bh->b_blocknr = bn;
- bh->b_bdev = iomapp->iomap_target->bt_bdev;
- set_buffer_mapped(bh);
- clear_buffer_delay(bh);
- clear_buffer_unwritten(bh);
-}
-
-/*
- * Look for a page at index that is suitable for clustering.
- */
-STATIC unsigned int
-xfs_probe_page(
- struct page *page,
- unsigned int pg_offset,
- int mapped)
-{
- int ret = 0;
-
- if (PageWriteback(page))
- return 0;
-
- if (page->mapping && PageDirty(page)) {
- if (page_has_buffers(page)) {
- struct buffer_head *bh, *head;
-
- bh = head = page_buffers(page);
- do {
- if (!buffer_uptodate(bh))
- break;
- if (mapped != buffer_mapped(bh))
- break;
- ret += bh->b_size;
- if (ret >= pg_offset)
- break;
- } while ((bh = bh->b_this_page) != head);
- } else
- ret = mapped ? 0 : PAGE_CACHE_SIZE;
- }
-
- return ret;
-}
-
-STATIC size_t
-xfs_probe_cluster(
- struct inode *inode,
- struct page *startpage,
- struct buffer_head *bh,
- struct buffer_head *head,
- int mapped)
-{
- struct pagevec pvec;
- pgoff_t tindex, tlast, tloff;
- size_t total = 0;
- int done = 0, i;
-
- /* First sum forwards in this page */
- do {
- if (mapped != buffer_mapped(bh))
- return total;
- total += bh->b_size;
- } while ((bh = bh->b_this_page) != head);
-
- /* if we reached the end of the page, sum forwards in following pages */
- tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
- tindex = startpage->index + 1;
-
- /* Prune this back to avoid pathological behavior */
- tloff = min(tlast, startpage->index + 64);
-
- pagevec_init(&pvec, 0);
- while (!done && tindex <= tloff) {
- unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
- if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
- break;
-
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
- size_t pg_offset, len = 0;
-
- if (tindex == tlast) {
- pg_offset =
- i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
- if (!pg_offset) {
- done = 1;
- break;
- }
- } else
- pg_offset = PAGE_CACHE_SIZE;
-
- if (page->index == tindex && !TestSetPageLocked(page)) {
- len = xfs_probe_page(page, pg_offset, mapped);
- unlock_page(page);
- }
-
- if (!len) {
- done = 1;
- break;
- }
-
- total += len;
- tindex++;
- }
-
- pagevec_release(&pvec);
- cond_resched();
- }
-
- return total;
-}
-
-/*
- * Test if a given page is suitable for writing as part of an unwritten
- * or delayed allocate extent.
- */
-STATIC int
-xfs_is_delayed_page(
- struct page *page,
- unsigned int type)
-{
- if (PageWriteback(page))
- return 0;
-
- if (page->mapping && page_has_buffers(page)) {
- struct buffer_head *bh, *head;
- int acceptable = 0;
-
- bh = head = page_buffers(page);
- do {
- if (buffer_unwritten(bh))
- acceptable = (type == IOMAP_UNWRITTEN);
- else if (buffer_delay(bh))
- acceptable = (type == IOMAP_DELAY);
- else if (buffer_mapped(bh))
- acceptable = (type == 0);
- else
- break;
- } while ((bh = bh->b_this_page) != head);
-
- if (acceptable)
- return 1;
- }
-
- return 0;
-}
-
-/*
- * Allocate & map buffers for page given the extent map. Write it out.
- * except for the original page of a writepage, this is called on
- * delalloc/unwritten pages only, for the original page it is possible
- * that the page has no mapping at all.
- */
-STATIC int
-xfs_convert_page(
- struct inode *inode,
- struct page *page,
- loff_t tindex,
- xfs_iomap_t *mp,
- xfs_ioend_t **ioendp,
- struct writeback_control *wbc,
- int startio,
- int all_bh)
-{
- struct buffer_head *bh, *head;
- xfs_off_t end_offset;
- unsigned long p_offset;
- unsigned int type;
- int bbits = inode->i_blkbits;
- int len, page_dirty;
- int count = 0, done = 0, uptodate = 1;
- xfs_off_t offset = page_offset(page);
-
- if (page->index != tindex)
- goto fail;
- if (TestSetPageLocked(page))
- goto fail;
- if (PageWriteback(page))
- goto fail_unlock_page;
- if (page->mapping != inode->i_mapping)
- goto fail_unlock_page;
- if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
- goto fail_unlock_page;
-
- /*
- * page_dirty is initially a count of buffers on the page before
- * EOF and is decrememted as we move each into a cleanable state.
- *
- * Derivation:
- *
- * End offset is the highest offset that this page should represent.
- * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
- * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
- * hence give us the correct page_dirty count. On any other page,
- * it will be zero and in that case we need page_dirty to be the
- * count of buffers on the page.
- */
- end_offset = min_t(unsigned long long,
- (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
- i_size_read(inode));
-
- len = 1 << inode->i_blkbits;
- p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
- PAGE_CACHE_SIZE);
- p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
- page_dirty = p_offset / len;
-
- bh = head = page_buffers(page);
- do {
- if (offset >= end_offset)
- break;
- if (!buffer_uptodate(bh))
- uptodate = 0;
- if (!(PageUptodate(page) || buffer_uptodate(bh))) {
- done = 1;
- continue;
- }
-
- if (buffer_unwritten(bh) || buffer_delay(bh)) {
- if (buffer_unwritten(bh))
- type = IOMAP_UNWRITTEN;
- else
- type = IOMAP_DELAY;
-
- if (!xfs_iomap_valid(mp, offset)) {
- done = 1;
- continue;
- }
-
- ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
- ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
-
- xfs_map_at_offset(bh, offset, bbits, mp);
- if (startio) {
- xfs_add_to_ioend(inode, bh, offset,
- type, ioendp, done);
- } else {
- set_buffer_dirty(bh);
- unlock_buffer(bh);
- mark_buffer_dirty(bh);
- }
- page_dirty--;
- count++;
- } else {
- type = 0;
- if (buffer_mapped(bh) && all_bh && startio) {
- lock_buffer(bh);
- xfs_add_to_ioend(inode, bh, offset,
- type, ioendp, done);
- count++;
- page_dirty--;
- } else {
- done = 1;
- }
- }
- } while (offset += len, (bh = bh->b_this_page) != head);
-
- if (uptodate && bh == head)
- SetPageUptodate(page);
-
- if (startio) {
- if (count) {
- struct backing_dev_info *bdi;
-
- bdi = inode->i_mapping->backing_dev_info;
- if (bdi_write_congested(bdi)) {
- wbc->encountered_congestion = 1;
- done = 1;
- } else if (--wbc->nr_to_write <= 0) {
- done = 1;
- }
- }
- xfs_start_page_writeback(page, wbc, !page_dirty, count);
- }
-
- return done;
- fail_unlock_page:
- unlock_page(page);
- fail:
- return 1;
-}
-
-/*
- * Convert & write out a cluster of pages in the same extent as defined
- * by mp and following the start page.
- */
-STATIC void
-xfs_cluster_write(
- struct inode *inode,
- pgoff_t tindex,
- xfs_iomap_t *iomapp,
- xfs_ioend_t **ioendp,
- struct writeback_control *wbc,
- int startio,
- int all_bh,
- pgoff_t tlast)
-{
- struct pagevec pvec;
- int done = 0, i;
-
- pagevec_init(&pvec, 0);
- while (!done && tindex <= tlast) {
- unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
- if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
- break;
-
- for (i = 0; i < pagevec_count(&pvec); i++) {
- done = xfs_convert_page(inode, pvec.pages[i], tindex++,
- iomapp, ioendp, wbc, startio, all_bh);
- if (done)
- break;
- }
-
- pagevec_release(&pvec);
- cond_resched();
- }
-}
-
-/*
- * Calling this without startio set means we are being asked to make a dirty
- * page ready for freeing it's buffers. When called with startio set then
- * we are coming from writepage.
- *
- * When called with startio set it is important that we write the WHOLE
- * page if possible.
- * The bh->b_state's cannot know if any of the blocks or which block for
- * that matter are dirty due to mmap writes, and therefore bh uptodate is
- * only vaild if the page itself isn't completely uptodate. Some layers
- * may clear the page dirty flag prior to calling write page, under the
- * assumption the entire page will be written out; by not writing out the
- * whole page the page can be reused before all valid dirty data is
- * written out. Note: in the case of a page that has been dirty'd by
- * mapwrite and but partially setup by block_prepare_write the
- * bh->b_states's will not agree and only ones setup by BPW/BCW will have
- * valid state, thus the whole page must be written out thing.
- */
-
-STATIC int
-xfs_page_state_convert(
- struct inode *inode,
- struct page *page,
- struct writeback_control *wbc,
- int startio,
- int unmapped) /* also implies page uptodate */
-{
- struct buffer_head *bh, *head;
- xfs_iomap_t iomap;
- xfs_ioend_t *ioend = NULL, *iohead = NULL;
- loff_t offset;
- unsigned long p_offset = 0;
- unsigned int type;
- __uint64_t end_offset;
- pgoff_t end_index, last_index, tlast;
- ssize_t size, len;
- int flags, err, iomap_valid = 0, uptodate = 1;
- int page_dirty, count = 0, trylock_flag = 0;
- int all_bh = unmapped;
-
- /* wait for other IO threads? */
- if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking))
- trylock_flag |= BMAPI_TRYLOCK;
-
- /* Is this page beyond the end of the file? */
- offset = i_size_read(inode);
- end_index = offset >> PAGE_CACHE_SHIFT;
- last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
- if (page->index >= end_index) {
- if ((page->index >= end_index + 1) ||
- !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
- if (startio)
- unlock_page(page);
- return 0;
- }
- }
-
- /*
- * page_dirty is initially a count of buffers on the page before
- * EOF and is decrememted as we move each into a cleanable state.
- *
- * Derivation:
- *
- * End offset is the highest offset that this page should represent.
- * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
- * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
- * hence give us the correct page_dirty count. On any other page,
- * it will be zero and in that case we need page_dirty to be the
- * count of buffers on the page.
- */
- end_offset = min_t(unsigned long long,
- (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
- len = 1 << inode->i_blkbits;
- p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
- PAGE_CACHE_SIZE);
- p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
- page_dirty = p_offset / len;
-
- bh = head = page_buffers(page);
- offset = page_offset(page);
- flags = -1;
- type = 0;
-
- /* TODO: cleanup count and page_dirty */
-
- do {
- if (offset >= end_offset)
- break;
- if (!buffer_uptodate(bh))
- uptodate = 0;
- if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
- /*
- * the iomap is actually still valid, but the ioend
- * isn't. shouldn't happen too often.
- */
- iomap_valid = 0;
- continue;
- }
-
- if (iomap_valid)
- iomap_valid = xfs_iomap_valid(&iomap, offset);
-
- /*
- * First case, map an unwritten extent and prepare for
- * extent state conversion transaction on completion.
- *
- * Second case, allocate space for a delalloc buffer.
- * We can return EAGAIN here in the release page case.
- *
- * Third case, an unmapped buffer was found, and we are
- * in a path where we need to write the whole page out.
- */
- if (buffer_unwritten(bh) || buffer_delay(bh) ||
- ((buffer_uptodate(bh) || PageUptodate(page)) &&
- !buffer_mapped(bh) && (unmapped || startio))) {
- /*
- * Make sure we don't use a read-only iomap
- */
- if (flags == BMAPI_READ)
- iomap_valid = 0;
-
- if (buffer_unwritten(bh)) {
- type = IOMAP_UNWRITTEN;
- flags = BMAPI_WRITE|BMAPI_IGNSTATE;
- } else if (buffer_delay(bh)) {
- type = IOMAP_DELAY;
- flags = BMAPI_ALLOCATE;
- if (!startio)
- flags |= trylock_flag;
- } else {
- type = IOMAP_NEW;
- flags = BMAPI_WRITE|BMAPI_MMAP;
- }
-
- if (!iomap_valid) {
- if (type == IOMAP_NEW) {
- size = xfs_probe_cluster(inode,
- page, bh, head, 0);
- } else {
- size = len;
- }
-
- err = xfs_map_blocks(inode, offset, size,
- &iomap, flags);
- if (err)
- goto error;
- iomap_valid = xfs_iomap_valid(&iomap, offset);
- }
- if (iomap_valid) {
- xfs_map_at_offset(bh, offset,
- inode->i_blkbits, &iomap);
- if (startio) {
- xfs_add_to_ioend(inode, bh, offset,
- type, &ioend,
- !iomap_valid);
- } else {
- set_buffer_dirty(bh);
- unlock_buffer(bh);
- mark_buffer_dirty(bh);
- }
- page_dirty--;
- count++;
- }
- } else if (buffer_uptodate(bh) && startio) {
- /*
- * we got here because the buffer is already mapped.
- * That means it must already have extents allocated
- * underneath it. Map the extent by reading it.
- */
- if (!iomap_valid || type != 0) {
- flags = BMAPI_READ;
- size = xfs_probe_cluster(inode, page, bh,
- head, 1);
- err = xfs_map_blocks(inode, offset, size,
- &iomap, flags);
- if (err)
- goto error;
- iomap_valid = xfs_iomap_valid(&iomap, offset);
- }
-
- type = 0;
- if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
- ASSERT(buffer_mapped(bh));
- if (iomap_valid)
- all_bh = 1;
- xfs_add_to_ioend(inode, bh, offset, type,
- &ioend, !iomap_valid);
- page_dirty--;
- count++;
- } else {
- iomap_valid = 0;
- }
- } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
- (unmapped || startio)) {
- iomap_valid = 0;
- }
-
- if (!iohead)
- iohead = ioend;
-
- } while (offset += len, ((bh = bh->b_this_page) != head));
-
- if (uptodate && bh == head)
- SetPageUptodate(page);
-
- if (startio)
- xfs_start_page_writeback(page, wbc, 1, count);
-
- if (ioend && iomap_valid) {
- offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
- PAGE_CACHE_SHIFT;
- tlast = min_t(pgoff_t, offset, last_index);
- xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,
- wbc, startio, all_bh, tlast);
- }
-
- if (iohead)
- xfs_submit_ioend(iohead);
-
- return page_dirty;
-
-error:
- if (iohead)
- xfs_cancel_ioend(iohead);
-
- /*
- * If it's delalloc and we have nowhere to put it,
- * throw it away, unless the lower layers told
- * us to try again.
- */
- if (err != -EAGAIN) {
- if (!unmapped)
- block_invalidatepage(page, 0);
- ClearPageUptodate(page);
- }
- return err;
-}
-
-STATIC int
-__linvfs_get_block(
- struct inode *inode,
- sector_t iblock,
- unsigned long blocks,
- struct buffer_head *bh_result,
- int create,
- int direct,
- bmapi_flags_t flags)
-{
- vnode_t *vp = LINVFS_GET_VP(inode);
- xfs_iomap_t iomap;
- xfs_off_t offset;
- ssize_t size;
- int retpbbm = 1;
- int error;
-
- offset = (xfs_off_t)iblock << inode->i_blkbits;
- if (blocks)
- size = (ssize_t) min_t(xfs_off_t, LONG_MAX,
- (xfs_off_t)blocks << inode->i_blkbits);
- else
- size = 1 << inode->i_blkbits;
-
- VOP_BMAP(vp, offset, size,
- create ? flags : BMAPI_READ, &iomap, &retpbbm, error);
- if (error)
- return -error;
-
- if (retpbbm == 0)
- return 0;
-
- if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
- xfs_daddr_t bn;
- xfs_off_t delta;
-
- /* For unwritten extents do not report a disk address on
- * the read case (treat as if we're reading into a hole).
- */
- if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) {
- delta = offset - iomap.iomap_offset;
- delta >>= inode->i_blkbits;
-
- bn = iomap.iomap_bn >> (inode->i_blkbits - BBSHIFT);
- bn += delta;
- BUG_ON(!bn && !(iomap.iomap_flags & IOMAP_REALTIME));
- bh_result->b_blocknr = bn;
- set_buffer_mapped(bh_result);
- }
- if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
- if (direct)
- bh_result->b_private = inode;
- set_buffer_unwritten(bh_result);
- set_buffer_delay(bh_result);
- }
- }
-
- /* If this is a realtime file, data might be on a new device */
- bh_result->b_bdev = iomap.iomap_target->bt_bdev;
-
- /* If we previously allocated a block out beyond eof and
- * we are now coming back to use it then we will need to
- * flag it as new even if it has a disk address.
- */
- if (create &&
- ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
- (offset >= i_size_read(inode)) || (iomap.iomap_flags & IOMAP_NEW)))
- set_buffer_new(bh_result);
-
- if (iomap.iomap_flags & IOMAP_DELAY) {
- BUG_ON(direct);
- if (create) {
- set_buffer_uptodate(bh_result);
- set_buffer_mapped(bh_result);
- set_buffer_delay(bh_result);
- }
- }
-
- if (blocks) {
- ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0);
- offset = min_t(xfs_off_t,
- iomap.iomap_bsize - iomap.iomap_delta,
- (xfs_off_t)blocks << inode->i_blkbits);
- bh_result->b_size = (u32) min_t(xfs_off_t, UINT_MAX, offset);
- }
-
- return 0;
-}
-
-int
-linvfs_get_block(
- struct inode *inode,
- sector_t iblock,
- struct buffer_head *bh_result,
- int create)
-{
- return __linvfs_get_block(inode, iblock, 0, bh_result,
- create, 0, BMAPI_WRITE);
-}
-
-STATIC int
-linvfs_get_blocks_direct(
- struct inode *inode,
- sector_t iblock,
- unsigned long max_blocks,
- struct buffer_head *bh_result,
- int create)
-{
- return __linvfs_get_block(inode, iblock, max_blocks, bh_result,
- create, 1, BMAPI_WRITE|BMAPI_DIRECT);
-}
-
-STATIC void
-linvfs_end_io_direct(
- struct kiocb *iocb,
- loff_t offset,
- ssize_t size,
- void *private)
-{
- xfs_ioend_t *ioend = iocb->private;
-
- /*
- * Non-NULL private data means we need to issue a transaction to
- * convert a range from unwritten to written extents. This needs
- * to happen from process contect but aio+dio I/O completion
- * happens from irq context so we need to defer it to a workqueue.
- * This is not nessecary for synchronous direct I/O, but we do
- * it anyway to keep the code uniform and simpler.
- *
- * The core direct I/O code might be changed to always call the
- * completion handler in the future, in which case all this can
- * go away.
- */
- if (private && size > 0) {
- ioend->io_offset = offset;
- ioend->io_size = size;
- xfs_finish_ioend(ioend);
- } else {
- ASSERT(size >= 0);
- xfs_destroy_ioend(ioend);
- }
-
- /*
- * blockdev_direct_IO can return an error even afer the I/O
- * completion handler was called. Thus we need to protect
- * against double-freeing.
- */
- iocb->private = NULL;
-}
-
-STATIC ssize_t
-linvfs_direct_IO(
- int rw,
- struct kiocb *iocb,
- const struct iovec *iov,
- loff_t offset,
- unsigned long nr_segs)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = LINVFS_GET_VP(inode);
- xfs_iomap_t iomap;
- int maps = 1;
- int error;
- ssize_t ret;
-
- VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
- if (error)
- return -error;
-
- iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
-
- ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
- iomap.iomap_target->bt_bdev,
- iov, offset, nr_segs,
- linvfs_get_blocks_direct,
- linvfs_end_io_direct);
-
- if (unlikely(ret <= 0 && iocb->private))
- xfs_destroy_ioend(iocb->private);
- return ret;
-}
-
-
-STATIC sector_t
-linvfs_bmap(
- struct address_space *mapping,
- sector_t block)
-{
- struct inode *inode = (struct inode *)mapping->host;
- vnode_t *vp = LINVFS_GET_VP(inode);
- int error;
-
- vn_trace_entry(vp, "linvfs_bmap", (inst_t *)__return_address);
-
- VOP_RWLOCK(vp, VRWLOCK_READ);
- VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
- VOP_RWUNLOCK(vp, VRWLOCK_READ);
- return generic_block_bmap(mapping, block, linvfs_get_block);
-}
-
-STATIC int
-linvfs_readpage(
- struct file *unused,
- struct page *page)
-{
- return mpage_readpage(page, linvfs_get_block);
-}
-
-STATIC int
-linvfs_readpages(
- struct file *unused,
- struct address_space *mapping,
- struct list_head *pages,
- unsigned nr_pages)
-{
- return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block);
-}
-
-STATIC void
-xfs_count_page_state(
- struct page *page,
- int *delalloc,
- int *unmapped,
- int *unwritten)
-{
- struct buffer_head *bh, *head;
-
- *delalloc = *unmapped = *unwritten = 0;
-
- bh = head = page_buffers(page);
- do {
- if (buffer_uptodate(bh) && !buffer_mapped(bh))
- (*unmapped) = 1;
- else if (buffer_unwritten(bh) && !buffer_delay(bh))
- clear_buffer_unwritten(bh);
- else if (buffer_unwritten(bh))
- (*unwritten) = 1;
- else if (buffer_delay(bh))
- (*delalloc) = 1;
- } while ((bh = bh->b_this_page) != head);
-}
-
-
-/*
- * writepage: Called from one of two places:
- *
- * 1. we are flushing a delalloc buffer head.
- *
- * 2. we are writing out a dirty page. Typically the page dirty
- * state is cleared before we get here. In this case is it
- * conceivable we have no buffer heads.
- *
- * For delalloc space on the page we need to allocate space and
- * flush it. For unmapped buffer heads on the page we should
- * allocate space if the page is uptodate. For any other dirty
- * buffer heads on the page we should flush them.
- *
- * If we detect that a transaction would be required to flush
- * the page, we have to check the process flags first, if we
- * are already in a transaction or disk I/O during allocations
- * is off, we need to fail the writepage and redirty the page.
- */
-
-STATIC int
-linvfs_writepage(
- struct page *page,
- struct writeback_control *wbc)
-{
- int error;
- int need_trans;
- int delalloc, unmapped, unwritten;
- struct inode *inode = page->mapping->host;
-
- xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0);
-
- /*
- * We need a transaction if:
- * 1. There are delalloc buffers on the page
- * 2. The page is uptodate and we have unmapped buffers
- * 3. The page is uptodate and we have no buffers
- * 4. There are unwritten buffers on the page
- */
-
- if (!page_has_buffers(page)) {
- unmapped = 1;
- need_trans = 1;
- } else {
- xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
- if (!PageUptodate(page))
- unmapped = 0;
- need_trans = delalloc + unmapped + unwritten;
- }
-
- /*
- * If we need a transaction and the process flags say
- * we are already in a transaction, or no IO is allowed
- * then mark the page dirty again and leave the page
- * as is.
- */
- if (PFLAGS_TEST_FSTRANS() && need_trans)
- goto out_fail;
-
- /*
- * Delay hooking up buffer heads until we have
- * made our go/no-go decision.
- */
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-
- /*
- * Convert delayed allocate, unwritten or unmapped space
- * to real space and flush out to disk.
- */
- error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
- if (error == -EAGAIN)
- goto out_fail;
- if (unlikely(error < 0))
- goto out_unlock;
-
- return 0;
-
-out_fail:
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
-out_unlock:
- unlock_page(page);
- return error;
-}
-
-STATIC int
-linvfs_invalidate_page(
- struct page *page,
- unsigned long offset)
-{
- xfs_page_trace(XFS_INVALIDPAGE_ENTER,
- page->mapping->host, page, offset);
- return block_invalidatepage(page, offset);
-}
-
-/*
- * Called to move a page into cleanable state - and from there
- * to be released. Possibly the page is already clean. We always
- * have buffer heads in this call.
- *
- * Returns 0 if the page is ok to release, 1 otherwise.
- *
- * Possible scenarios are:
- *
- * 1. We are being called to release a page which has been written
- * to via regular I/O. buffer heads will be dirty and possibly
- * delalloc. If no delalloc buffer heads in this case then we
- * can just return zero.
- *
- * 2. We are called to release a page which has been written via
- * mmap, all we need to do is ensure there is no delalloc
- * state in the buffer heads, if not we can let the caller
- * free them and we should come back later via writepage.
- */
-STATIC int
-linvfs_release_page(
- struct page *page,
- gfp_t gfp_mask)
-{
- struct inode *inode = page->mapping->host;
- int dirty, delalloc, unmapped, unwritten;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = 1,
- };
-
- xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
-
- xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
- if (!delalloc && !unwritten)
- goto free_buffers;
-
- if (!(gfp_mask & __GFP_FS))
- return 0;
-
- /* If we are already inside a transaction or the thread cannot
- * do I/O, we cannot release this page.
- */
- if (PFLAGS_TEST_FSTRANS())
- return 0;
-
- /*
- * Convert delalloc space to real space, do not flush the
- * data out to disk, that will be done by the caller.
- * Never need to allocate space here - we will always
- * come back to writepage in that case.
- */
- dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
- if (dirty == 0 && !unwritten)
- goto free_buffers;
- return 0;
-
-free_buffers:
- return try_to_free_buffers(page);
-}
-
-STATIC int
-linvfs_prepare_write(
- struct file *file,
- struct page *page,
- unsigned int from,
- unsigned int to)
-{
- return block_prepare_write(page, from, to, linvfs_get_block);
-}
-
-struct address_space_operations linvfs_aops = {
- .readpage = linvfs_readpage,
- .readpages = linvfs_readpages,
- .writepage = linvfs_writepage,
- .sync_page = block_sync_page,
- .releasepage = linvfs_release_page,
- .invalidatepage = linvfs_invalidate_page,
- .prepare_write = linvfs_prepare_write,
- .commit_write = generic_commit_write,
- .bmap = linvfs_bmap,
- .direct_IO = linvfs_direct_IO,
- .migratepage = buffer_migrate_page,
-};
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
deleted file mode 100644
index bfb4f2917bb..00000000000
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ /dev/null
@@ -1,1855 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/bio.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/percpu.h>
-#include <linux/blkdev.h>
-#include <linux/hash.h>
-#include <linux/kthread.h>
-#include "xfs_linux.h"
-
-STATIC kmem_zone_t *xfs_buf_zone;
-STATIC kmem_shaker_t xfs_buf_shake;
-STATIC int xfsbufd(void *);
-STATIC int xfsbufd_wakeup(int, gfp_t);
-STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
-
-STATIC struct workqueue_struct *xfslogd_workqueue;
-struct workqueue_struct *xfsdatad_workqueue;
-
-#ifdef XFS_BUF_TRACE
-void
-xfs_buf_trace(
- xfs_buf_t *bp,
- char *id,
- void *data,
- void *ra)
-{
- ktrace_enter(xfs_buf_trace_buf,
- bp, id,
- (void *)(unsigned long)bp->b_flags,
- (void *)(unsigned long)bp->b_hold.counter,
- (void *)(unsigned long)bp->b_sema.count.counter,
- (void *)current,
- data, ra,
- (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
- (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),
- (void *)(unsigned long)bp->b_buffer_length,
- NULL, NULL, NULL, NULL, NULL);
-}
-ktrace_t *xfs_buf_trace_buf;
-#define XFS_BUF_TRACE_SIZE 4096
-#define XB_TRACE(bp, id, data) \
- xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))
-#else
-#define XB_TRACE(bp, id, data) do { } while (0)
-#endif
-
-#ifdef XFS_BUF_LOCK_TRACKING
-# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
-# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
-# define XB_GET_OWNER(bp) ((bp)->b_last_holder)
-#else
-# define XB_SET_OWNER(bp) do { } while (0)
-# define XB_CLEAR_OWNER(bp) do { } while (0)
-# define XB_GET_OWNER(bp) do { } while (0)
-#endif
-
-#define xb_to_gfp(flags) \
- ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
- ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
-
-#define xb_to_km(flags) \
- (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
-
-#define xfs_buf_allocate(flags) \
- kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
-#define xfs_buf_deallocate(bp) \
- kmem_zone_free(xfs_buf_zone, (bp));
-
-/*
- * Page Region interfaces.
- *
- * For pages in filesystems where the blocksize is smaller than the
- * pagesize, we use the page->private field (long) to hold a bitmap
- * of uptodate regions within the page.
- *
- * Each such region is "bytes per page / bits per long" bytes long.
- *
- * NBPPR == number-of-bytes-per-page-region
- * BTOPR == bytes-to-page-region (rounded up)
- * BTOPRT == bytes-to-page-region-truncated (rounded down)
- */
-#if (BITS_PER_LONG == 32)
-#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
-#elif (BITS_PER_LONG == 64)
-#define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */
-#else
-#error BITS_PER_LONG must be 32 or 64
-#endif
-#define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG)
-#define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
-#define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT))
-
-STATIC unsigned long
-page_region_mask(
- size_t offset,
- size_t length)
-{
- unsigned long mask;
- int first, final;
-
- first = BTOPR(offset);
- final = BTOPRT(offset + length - 1);
- first = min(first, final);
-
- mask = ~0UL;
- mask <<= BITS_PER_LONG - (final - first);
- mask >>= BITS_PER_LONG - (final);
-
- ASSERT(offset + length <= PAGE_CACHE_SIZE);
- ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
-
- return mask;
-}
-
-STATIC inline void
-set_page_region(
- struct page *page,
- size_t offset,
- size_t length)
-{
- set_page_private(page,
- page_private(page) | page_region_mask(offset, length));
- if (page_private(page) == ~0UL)
- SetPageUptodate(page);
-}
-
-STATIC inline int
-test_page_region(
- struct page *page,
- size_t offset,
- size_t length)
-{
- unsigned long mask = page_region_mask(offset, length);
-
- return (mask && (page_private(page) & mask) == mask);
-}
-
-/*
- * Mapping of multi-page buffers into contiguous virtual space
- */
-
-typedef struct a_list {
- void *vm_addr;
- struct a_list *next;
-} a_list_t;
-
-STATIC a_list_t *as_free_head;
-STATIC int as_list_len;
-STATIC DEFINE_SPINLOCK(as_lock);
-
-/*
- * Try to batch vunmaps because they are costly.
- */
-STATIC void
-free_address(
- void *addr)
-{
- a_list_t *aentry;
-
- aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH);
- if (likely(aentry)) {
- spin_lock(&as_lock);
- aentry->next = as_free_head;
- aentry->vm_addr = addr;
- as_free_head = aentry;
- as_list_len++;
- spin_unlock(&as_lock);
- } else {
- vunmap(addr);
- }
-}
-
-STATIC void
-purge_addresses(void)
-{
- a_list_t *aentry, *old;
-
- if (as_free_head == NULL)
- return;
-
- spin_lock(&as_lock);
- aentry = as_free_head;
- as_free_head = NULL;
- as_list_len = 0;
- spin_unlock(&as_lock);
-
- while ((old = aentry) != NULL) {
- vunmap(aentry->vm_addr);
- aentry = aentry->next;
- kfree(old);
- }
-}
-
-/*
- * Internal xfs_buf_t object manipulation
- */
-
-STATIC void
-_xfs_buf_initialize(
- xfs_buf_t *bp,
- xfs_buftarg_t *target,
- xfs_off_t range_base,
- size_t range_length,
- xfs_buf_flags_t flags)
-{
- /*
- * We don't want certain flags to appear in b_flags.
- */
- flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
-
- memset(bp, 0, sizeof(xfs_buf_t));
- atomic_set(&bp->b_hold, 1);
- init_MUTEX_LOCKED(&bp->b_iodonesema);
- INIT_LIST_HEAD(&bp->b_list);
- INIT_LIST_HEAD(&bp->b_hash_list);
- init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
- XB_SET_OWNER(bp);
- bp->b_target = target;
- bp->b_file_offset = range_base;
- /*
- * Set buffer_length and count_desired to the same value initially.
- * I/O routines should use count_desired, which will be the same in
- * most cases but may be reset (e.g. XFS recovery).
- */
- bp->b_buffer_length = bp->b_count_desired = range_length;
- bp->b_flags = flags;
- bp->b_bn = XFS_BUF_DADDR_NULL;
- atomic_set(&bp->b_pin_count, 0);
- init_waitqueue_head(&bp->b_waiters);
-
- XFS_STATS_INC(xb_create);
- XB_TRACE(bp, "initialize", target);
-}
-
-/*
- * Allocate a page array capable of holding a specified number
- * of pages, and point the page buf at it.
- */
-STATIC int
-_xfs_buf_get_pages(
- xfs_buf_t *bp,
- int page_count,
- xfs_buf_flags_t flags)
-{
- /* Make sure that we have a page list */
- if (bp->b_pages == NULL) {
- bp->b_offset = xfs_buf_poff(bp->b_file_offset);
- bp->b_page_count = page_count;
- if (page_count <= XB_PAGES) {
- bp->b_pages = bp->b_page_array;
- } else {
- bp->b_pages = kmem_alloc(sizeof(struct page *) *
- page_count, xb_to_km(flags));
- if (bp->b_pages == NULL)
- return -ENOMEM;
- }
- memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
- }
- return 0;
-}
-
-/*
- * Frees b_pages if it was allocated.
- */
-STATIC void
-_xfs_buf_free_pages(
- xfs_buf_t *bp)
-{
- if (bp->b_pages != bp->b_page_array) {
- kmem_free(bp->b_pages,
- bp->b_page_count * sizeof(struct page *));
- }
-}
-
-/*
- * Releases the specified buffer.
- *
- * The modification state of any associated pages is left unchanged.
- * The buffer most not be on any hash - use xfs_buf_rele instead for
- * hashed and refcounted buffers
- */
-void
-xfs_buf_free(
- xfs_buf_t *bp)
-{
- XB_TRACE(bp, "free", 0);
-
- ASSERT(list_empty(&bp->b_hash_list));
-
- if (bp->b_flags & _XBF_PAGE_CACHE) {
- uint i;
-
- if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
- free_address(bp->b_addr - bp->b_offset);
-
- for (i = 0; i < bp->b_page_count; i++)
- page_cache_release(bp->b_pages[i]);
- _xfs_buf_free_pages(bp);
- } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
- /*
- * XXX(hch): bp->b_count_desired might be incorrect (see
- * xfs_buf_associate_memory for details), but fortunately
- * the Linux version of kmem_free ignores the len argument..
- */
- kmem_free(bp->b_addr, bp->b_count_desired);
- _xfs_buf_free_pages(bp);
- }
-
- xfs_buf_deallocate(bp);
-}
-
-/*
- * Finds all pages for buffer in question and builds it's page list.
- */
-STATIC int
-_xfs_buf_lookup_pages(
- xfs_buf_t *bp,
- uint flags)
-{
- struct address_space *mapping = bp->b_target->bt_mapping;
- size_t blocksize = bp->b_target->bt_bsize;
- size_t size = bp->b_count_desired;
- size_t nbytes, offset;
- gfp_t gfp_mask = xb_to_gfp(flags);
- unsigned short page_count, i;
- pgoff_t first;
- xfs_off_t end;
- int error;
-
- end = bp->b_file_offset + bp->b_buffer_length;
- page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
-
- error = _xfs_buf_get_pages(bp, page_count, flags);
- if (unlikely(error))
- return error;
- bp->b_flags |= _XBF_PAGE_CACHE;
-
- offset = bp->b_offset;
- first = bp->b_file_offset >> PAGE_CACHE_SHIFT;
-
- for (i = 0; i < bp->b_page_count; i++) {
- struct page *page;
- uint retries = 0;
-
- retry:
- page = find_or_create_page(mapping, first + i, gfp_mask);
- if (unlikely(page == NULL)) {
- if (flags & XBF_READ_AHEAD) {
- bp->b_page_count = i;
- for (i = 0; i < bp->b_page_count; i++)
- unlock_page(bp->b_pages[i]);
- return -ENOMEM;
- }
-
- /*
- * This could deadlock.
- *
- * But until all the XFS lowlevel code is revamped to
- * handle buffer allocation failures we can't do much.
- */
- if (!(++retries % 100))
- printk(KERN_ERR
- "XFS: possible memory allocation "
- "deadlock in %s (mode:0x%x)\n",
- __FUNCTION__, gfp_mask);
-
- XFS_STATS_INC(xb_page_retries);
- xfsbufd_wakeup(0, gfp_mask);
- blk_congestion_wait(WRITE, HZ/50);
- goto retry;
- }
-
- XFS_STATS_INC(xb_page_found);
-
- nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
- size -= nbytes;
-
- if (!PageUptodate(page)) {
- page_count--;
- if (blocksize >= PAGE_CACHE_SIZE) {
- if (flags & XBF_READ)
- bp->b_locked = 1;
- } else if (!PagePrivate(page)) {
- if (test_page_region(page, offset, nbytes))
- page_count++;
- }
- }
-
- bp->b_pages[i] = page;
- offset = 0;
- }
-
- if (!bp->b_locked) {
- for (i = 0; i < bp->b_page_count; i++)
- unlock_page(bp->b_pages[i]);
- }
-
- if (page_count == bp->b_page_count)
- bp->b_flags |= XBF_DONE;
-
- XB_TRACE(bp, "lookup_pages", (long)page_count);
- return error;
-}
-
-/*
- * Map buffer into kernel address-space if nessecary.
- */
-STATIC int
-_xfs_buf_map_pages(
- xfs_buf_t *bp,
- uint flags)
-{
- /* A single page buffer is always mappable */
- if (bp->b_page_count == 1) {
- bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
- bp->b_flags |= XBF_MAPPED;
- } else if (flags & XBF_MAPPED) {
- if (as_list_len > 64)
- purge_addresses();
- bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
- VM_MAP, PAGE_KERNEL);
- if (unlikely(bp->b_addr == NULL))
- return -ENOMEM;
- bp->b_addr += bp->b_offset;
- bp->b_flags |= XBF_MAPPED;
- }
-
- return 0;
-}
-
-/*
- * Finding and Reading Buffers
- */
-
-/*
- * Look up, and creates if absent, a lockable buffer for
- * a given range of an inode. The buffer is returned
- * locked. If other overlapping buffers exist, they are
- * released before the new buffer is created and locked,
- * which may imply that this call will block until those buffers
- * are unlocked. No I/O is implied by this call.
- */
-xfs_buf_t *
-_xfs_buf_find(
- xfs_buftarg_t *btp, /* block device target */
- xfs_off_t ioff, /* starting offset of range */
- size_t isize, /* length of range */
- xfs_buf_flags_t flags,
- xfs_buf_t *new_bp)
-{
- xfs_off_t range_base;
- size_t range_length;
- xfs_bufhash_t *hash;
- xfs_buf_t *bp, *n;
-
- range_base = (ioff << BBSHIFT);
- range_length = (isize << BBSHIFT);
-
- /* Check for IOs smaller than the sector size / not sector aligned */
- ASSERT(!(range_length < (1 << btp->bt_sshift)));
- ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
-
- hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
-
- spin_lock(&hash->bh_lock);
-
- list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
- ASSERT(btp == bp->b_target);
- if (bp->b_file_offset == range_base &&
- bp->b_buffer_length == range_length) {
- /*
- * If we look at something, bring it to the
- * front of the list for next time.
- */
- atomic_inc(&bp->b_hold);
- list_move(&bp->b_hash_list, &hash->bh_list);
- goto found;
- }
- }
-
- /* No match found */
- if (new_bp) {
- _xfs_buf_initialize(new_bp, btp, range_base,
- range_length, flags);
- new_bp->b_hash = hash;
- list_add(&new_bp->b_hash_list, &hash->bh_list);
- } else {
- XFS_STATS_INC(xb_miss_locked);
- }
-
- spin_unlock(&hash->bh_lock);
- return new_bp;
-
-found:
- spin_unlock(&hash->bh_lock);
-
- /* Attempt to get the semaphore without sleeping,
- * if this does not work then we need to drop the
- * spinlock and do a hard attempt on the semaphore.
- */
- if (down_trylock(&bp->b_sema)) {
- if (!(flags & XBF_TRYLOCK)) {
- /* wait for buffer ownership */
- XB_TRACE(bp, "get_lock", 0);
- xfs_buf_lock(bp);
- XFS_STATS_INC(xb_get_locked_waited);
- } else {
- /* We asked for a trylock and failed, no need
- * to look at file offset and length here, we
- * know that this buffer at least overlaps our
- * buffer and is locked, therefore our buffer
- * either does not exist, or is this buffer.
- */
- xfs_buf_rele(bp);
- XFS_STATS_INC(xb_busy_locked);
- return NULL;
- }
- } else {
- /* trylock worked */
- XB_SET_OWNER(bp);
- }
-
- if (bp->b_flags & XBF_STALE) {
- ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
- bp->b_flags &= XBF_MAPPED;
- }
- XB_TRACE(bp, "got_lock", 0);
- XFS_STATS_INC(xb_get_locked);
- return bp;
-}
-
-/*
- * Assembles a buffer covering the specified range.
- * Storage in memory for all portions of the buffer will be allocated,
- * although backing storage may not be.
- */
-xfs_buf_t *
-xfs_buf_get_flags(
- xfs_buftarg_t *target,/* target for buffer */
- xfs_off_t ioff, /* starting offset of range */
- size_t isize, /* length of range */
- xfs_buf_flags_t flags)
-{
- xfs_buf_t *bp, *new_bp;
- int error = 0, i;
-
- new_bp = xfs_buf_allocate(flags);
- if (unlikely(!new_bp))
- return NULL;
-
- bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
- if (bp == new_bp) {
- error = _xfs_buf_lookup_pages(bp, flags);
- if (error)
- goto no_buffer;
- } else {
- xfs_buf_deallocate(new_bp);
- if (unlikely(bp == NULL))
- return NULL;
- }
-
- for (i = 0; i < bp->b_page_count; i++)
- mark_page_accessed(bp->b_pages[i]);
-
- if (!(bp->b_flags & XBF_MAPPED)) {
- error = _xfs_buf_map_pages(bp, flags);
- if (unlikely(error)) {
- printk(KERN_WARNING "%s: failed to map pages\n",
- __FUNCTION__);
- goto no_buffer;
- }
- }
-
- XFS_STATS_INC(xb_get);
-
- /*
- * Always fill in the block number now, the mapped cases can do
- * their own overlay of this later.
- */
- bp->b_bn = ioff;
- bp->b_count_desired = bp->b_buffer_length;
-
- XB_TRACE(bp, "get", (unsigned long)flags);
- return bp;
-
- no_buffer:
- if (flags & (XBF_LOCK | XBF_TRYLOCK))
- xfs_buf_unlock(bp);
- xfs_buf_rele(bp);
- return NULL;
-}
-
-xfs_buf_t *
-xfs_buf_read_flags(
- xfs_buftarg_t *target,
- xfs_off_t ioff,
- size_t isize,
- xfs_buf_flags_t flags)
-{
- xfs_buf_t *bp;
-
- flags |= XBF_READ;
-
- bp = xfs_buf_get_flags(target, ioff, isize, flags);
- if (bp) {
- if (!XFS_BUF_ISDONE(bp)) {
- XB_TRACE(bp, "read", (unsigned long)flags);
- XFS_STATS_INC(xb_get_read);
- xfs_buf_iostart(bp, flags);
- } else if (flags & XBF_ASYNC) {
- XB_TRACE(bp, "read_async", (unsigned long)flags);
- /*
- * Read ahead call which is already satisfied,
- * drop the buffer
- */
- goto no_buffer;
- } else {
- XB_TRACE(bp, "read_done", (unsigned long)flags);
- /* We do not want read in the flags */
- bp->b_flags &= ~XBF_READ;
- }
- }
-
- return bp;
-
- no_buffer:
- if (flags & (XBF_LOCK | XBF_TRYLOCK))
- xfs_buf_unlock(bp);
- xfs_buf_rele(bp);
- return NULL;
-}
-
-/*
- * If we are not low on memory then do the readahead in a deadlock
- * safe manner.
- */
-void
-xfs_buf_readahead(
- xfs_buftarg_t *target,
- xfs_off_t ioff,
- size_t isize,
- xfs_buf_flags_t flags)
-{
- struct backing_dev_info *bdi;
-
- bdi = target->bt_mapping->backing_dev_info;
- if (bdi_read_congested(bdi))
- return;
-
- flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
- xfs_buf_read_flags(target, ioff, isize, flags);
-}
-
-xfs_buf_t *
-xfs_buf_get_empty(
- size_t len,
- xfs_buftarg_t *target)
-{
- xfs_buf_t *bp;
-
- bp = xfs_buf_allocate(0);
- if (bp)
- _xfs_buf_initialize(bp, target, 0, len, 0);
- return bp;
-}
-
-static inline struct page *
-mem_to_page(
- void *addr)
-{
- if (((unsigned long)addr < VMALLOC_START) ||
- ((unsigned long)addr >= VMALLOC_END)) {
- return virt_to_page(addr);
- } else {
- return vmalloc_to_page(addr);
- }
-}
-
-int
-xfs_buf_associate_memory(
- xfs_buf_t *bp,
- void *mem,
- size_t len)
-{
- int rval;
- int i = 0;
- size_t ptr;
- size_t end, end_cur;
- off_t offset;
- int page_count;
-
- page_count = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
- offset = (off_t) mem - ((off_t)mem & PAGE_CACHE_MASK);
- if (offset && (len > PAGE_CACHE_SIZE))
- page_count++;
-
- /* Free any previous set of page pointers */
- if (bp->b_pages)
- _xfs_buf_free_pages(bp);
-
- bp->b_pages = NULL;
- bp->b_addr = mem;
-
- rval = _xfs_buf_get_pages(bp, page_count, 0);
- if (rval)
- return rval;
-
- bp->b_offset = offset;
- ptr = (size_t) mem & PAGE_CACHE_MASK;
- end = PAGE_CACHE_ALIGN((size_t) mem + len);
- end_cur = end;
- /* set up first page */
- bp->b_pages[0] = mem_to_page(mem);
-
- ptr += PAGE_CACHE_SIZE;
- bp->b_page_count = ++i;
- while (ptr < end) {
- bp->b_pages[i] = mem_to_page((void *)ptr);
- bp->b_page_count = ++i;
- ptr += PAGE_CACHE_SIZE;
- }
- bp->b_locked = 0;
-
- bp->b_count_desired = bp->b_buffer_length = len;
- bp->b_flags |= XBF_MAPPED;
-
- return 0;
-}
-
-xfs_buf_t *
-xfs_buf_get_noaddr(
- size_t len,
- xfs_buftarg_t *target)
-{
- size_t malloc_len = len;
- xfs_buf_t *bp;
- void *data;
- int error;
-
- bp = xfs_buf_allocate(0);
- if (unlikely(bp == NULL))
- goto fail;
- _xfs_buf_initialize(bp, target, 0, len, 0);
-
- try_again:
- data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
- if (unlikely(data == NULL))
- goto fail_free_buf;
-
- /* check whether alignment matches.. */
- if ((__psunsigned_t)data !=
- ((__psunsigned_t)data & ~target->bt_smask)) {
- /* .. else double the size and try again */
- kmem_free(data, malloc_len);
- malloc_len <<= 1;
- goto try_again;
- }
-
- error = xfs_buf_associate_memory(bp, data, len);
- if (error)
- goto fail_free_mem;
- bp->b_flags |= _XBF_KMEM_ALLOC;
-
- xfs_buf_unlock(bp);
-
- XB_TRACE(bp, "no_daddr", data);
- return bp;
- fail_free_mem:
- kmem_free(data, malloc_len);
- fail_free_buf:
- xfs_buf_free(bp);
- fail:
- return NULL;
-}
-
-/*
- * Increment reference count on buffer, to hold the buffer concurrently
- * with another thread which may release (free) the buffer asynchronously.
- * Must hold the buffer already to call this function.
- */
-void
-xfs_buf_hold(
- xfs_buf_t *bp)
-{
- atomic_inc(&bp->b_hold);
- XB_TRACE(bp, "hold", 0);
-}
-
-/*
- * Releases a hold on the specified buffer. If the
- * the hold count is 1, calls xfs_buf_free.
- */
-void
-xfs_buf_rele(
- xfs_buf_t *bp)
-{
- xfs_bufhash_t *hash = bp->b_hash;
-
- XB_TRACE(bp, "rele", bp->b_relse);
-
- if (unlikely(!hash)) {
- ASSERT(!bp->b_relse);
- if (atomic_dec_and_test(&bp->b_hold))
- xfs_buf_free(bp);
- return;
- }
-
- if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
- if (bp->b_relse) {
- atomic_inc(&bp->b_hold);
- spin_unlock(&hash->bh_lock);
- (*(bp->b_relse)) (bp);
- } else if (bp->b_flags & XBF_FS_MANAGED) {
- spin_unlock(&hash->bh_lock);
- } else {
- ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
- list_del_init(&bp->b_hash_list);
- spin_unlock(&hash->bh_lock);
- xfs_buf_free(bp);
- }
- } else {
- /*
- * Catch reference count leaks
- */
- ASSERT(atomic_read(&bp->b_hold) >= 0);
- }
-}
-
-
-/*
- * Mutual exclusion on buffers. Locking model:
- *
- * Buffers associated with inodes for which buffer locking
- * is not enabled are not protected by semaphores, and are
- * assumed to be exclusively owned by the caller. There is a
- * spinlock in the buffer, used by the caller when concurrent
- * access is possible.
- */
-
-/*
- * Locks a buffer object, if it is not already locked.
- * Note that this in no way locks the underlying pages, so it is only
- * useful for synchronizing concurrent use of buffer objects, not for
- * synchronizing independent access to the underlying pages.
- */
-int
-xfs_buf_cond_lock(
- xfs_buf_t *bp)
-{
- int locked;
-
- locked = down_trylock(&bp->b_sema) == 0;
- if (locked) {
- XB_SET_OWNER(bp);
- }
- XB_TRACE(bp, "cond_lock", (long)locked);
- return locked ? 0 : -EBUSY;
-}
-
-#if defined(DEBUG) || defined(XFS_BLI_TRACE)
-int
-xfs_buf_lock_value(
- xfs_buf_t *bp)
-{
- return atomic_read(&bp->b_sema.count);
-}
-#endif
-
-/*
- * Locks a buffer object.
- * Note that this in no way locks the underlying pages, so it is only
- * useful for synchronizing concurrent use of buffer objects, not for
- * synchronizing independent access to the underlying pages.
- */
-void
-xfs_buf_lock(
- xfs_buf_t *bp)
-{
- XB_TRACE(bp, "lock", 0);
- if (atomic_read(&bp->b_io_remaining))
- blk_run_address_space(bp->b_target->bt_mapping);
- down(&bp->b_sema);
- XB_SET_OWNER(bp);
- XB_TRACE(bp, "locked", 0);
-}
-
-/*
- * Releases the lock on the buffer object.
- * If the buffer is marked delwri but is not queued, do so before we
- * unlock the buffer as we need to set flags correctly. We also need to
- * take a reference for the delwri queue because the unlocker is going to
- * drop their's and they don't know we just queued it.
- */
-void
-xfs_buf_unlock(
- xfs_buf_t *bp)
-{
- if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
- atomic_inc(&bp->b_hold);
- bp->b_flags |= XBF_ASYNC;
- xfs_buf_delwri_queue(bp, 0);
- }
-
- XB_CLEAR_OWNER(bp);
- up(&bp->b_sema);
- XB_TRACE(bp, "unlock", 0);
-}
-
-
-/*
- * Pinning Buffer Storage in Memory
- * Ensure that no attempt to force a buffer to disk will succeed.
- */
-void
-xfs_buf_pin(
- xfs_buf_t *bp)
-{
- atomic_inc(&bp->b_pin_count);
- XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);
-}
-
-void
-xfs_buf_unpin(
- xfs_buf_t *bp)
-{
- if (atomic_dec_and_test(&bp->b_pin_count))
- wake_up_all(&bp->b_waiters);
- XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);
-}
-
-int
-xfs_buf_ispin(
- xfs_buf_t *bp)
-{
- return atomic_read(&bp->b_pin_count);
-}
-
-STATIC void
-xfs_buf_wait_unpin(
- xfs_buf_t *bp)
-{
- DECLARE_WAITQUEUE (wait, current);
-
- if (atomic_read(&bp->b_pin_count) == 0)
- return;
-
- add_wait_queue(&bp->b_waiters, &wait);
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (atomic_read(&bp->b_pin_count) == 0)
- break;
- if (atomic_read(&bp->b_io_remaining))
- blk_run_address_space(bp->b_target->bt_mapping);
- schedule();
- }
- remove_wait_queue(&bp->b_waiters, &wait);
- set_current_state(TASK_RUNNING);
-}
-
-/*
- * Buffer Utility Routines
- */
-
-STATIC void
-xfs_buf_iodone_work(
- void *v)
-{
- xfs_buf_t *bp = (xfs_buf_t *)v;
-
- if (bp->b_iodone)
- (*(bp->b_iodone))(bp);
- else if (bp->b_flags & XBF_ASYNC)
- xfs_buf_relse(bp);
-}
-
-void
-xfs_buf_ioend(
- xfs_buf_t *bp,
- int schedule)
-{
- bp->b_flags &= ~(XBF_READ | XBF_WRITE);
- if (bp->b_error == 0)
- bp->b_flags |= XBF_DONE;
-
- XB_TRACE(bp, "iodone", bp->b_iodone);
-
- if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
- if (schedule) {
- INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp);
- queue_work(xfslogd_workqueue, &bp->b_iodone_work);
- } else {
- xfs_buf_iodone_work(bp);
- }
- } else {
- up(&bp->b_iodonesema);
- }
-}
-
-void
-xfs_buf_ioerror(
- xfs_buf_t *bp,
- int error)
-{
- ASSERT(error >= 0 && error <= 0xffff);
- bp->b_error = (unsigned short)error;
- XB_TRACE(bp, "ioerror", (unsigned long)error);
-}
-
-/*
- * Initiate I/O on a buffer, based on the flags supplied.
- * The b_iodone routine in the buffer supplied will only be called
- * when all of the subsidiary I/O requests, if any, have been completed.
- */
-int
-xfs_buf_iostart(
- xfs_buf_t *bp,
- xfs_buf_flags_t flags)
-{
- int status = 0;
-
- XB_TRACE(bp, "iostart", (unsigned long)flags);
-
- if (flags & XBF_DELWRI) {
- bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
- bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
- xfs_buf_delwri_queue(bp, 1);
- return status;
- }
-
- bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
- XBF_READ_AHEAD | _XBF_RUN_QUEUES);
- bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \
- XBF_READ_AHEAD | _XBF_RUN_QUEUES);
-
- BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL);
-
- /* For writes allow an alternate strategy routine to precede
- * the actual I/O request (which may not be issued at all in
- * a shutdown situation, for example).
- */
- status = (flags & XBF_WRITE) ?
- xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);
-
- /* Wait for I/O if we are not an async request.
- * Note: async I/O request completion will release the buffer,
- * and that can already be done by this point. So using the
- * buffer pointer from here on, after async I/O, is invalid.
- */
- if (!status && !(flags & XBF_ASYNC))
- status = xfs_buf_iowait(bp);
-
- return status;
-}
-
-STATIC __inline__ int
-_xfs_buf_iolocked(
- xfs_buf_t *bp)
-{
- ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE));
- if (bp->b_flags & XBF_READ)
- return bp->b_locked;
- return 0;
-}
-
-STATIC __inline__ void
-_xfs_buf_ioend(
- xfs_buf_t *bp,
- int schedule)
-{
- if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
- bp->b_locked = 0;
- xfs_buf_ioend(bp, schedule);
- }
-}
-
-STATIC int
-xfs_buf_bio_end_io(
- struct bio *bio,
- unsigned int bytes_done,
- int error)
-{
- xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
- unsigned int blocksize = bp->b_target->bt_bsize;
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-
- if (bio->bi_size)
- return 1;
-
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- bp->b_error = EIO;
-
- do {
- struct page *page = bvec->bv_page;
-
- if (unlikely(bp->b_error)) {
- if (bp->b_flags & XBF_READ)
- ClearPageUptodate(page);
- SetPageError(page);
- } else if (blocksize >= PAGE_CACHE_SIZE) {
- SetPageUptodate(page);
- } else if (!PagePrivate(page) &&
- (bp->b_flags & _XBF_PAGE_CACHE)) {
- set_page_region(page, bvec->bv_offset, bvec->bv_len);
- }
-
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
-
- if (_xfs_buf_iolocked(bp)) {
- unlock_page(page);
- }
- } while (bvec >= bio->bi_io_vec);
-
- _xfs_buf_ioend(bp, 1);
- bio_put(bio);
- return 0;
-}
-
-STATIC void
-_xfs_buf_ioapply(
- xfs_buf_t *bp)
-{
- int i, rw, map_i, total_nr_pages, nr_pages;
- struct bio *bio;
- int offset = bp->b_offset;
- int size = bp->b_count_desired;
- sector_t sector = bp->b_bn;
- unsigned int blocksize = bp->b_target->bt_bsize;
- int locking = _xfs_buf_iolocked(bp);
-
- total_nr_pages = bp->b_page_count;
- map_i = 0;
-
- if (bp->b_flags & _XBF_RUN_QUEUES) {
- bp->b_flags &= ~_XBF_RUN_QUEUES;
- rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC;
- } else {
- rw = (bp->b_flags & XBF_READ) ? READ : WRITE;
- }
-
- if (bp->b_flags & XBF_ORDERED) {
- ASSERT(!(bp->b_flags & XBF_READ));
- rw = WRITE_BARRIER;
- }
-
- /* Special code path for reading a sub page size buffer in --
- * we populate up the whole page, and hence the other metadata
- * in the same page. This optimization is only valid when the
- * filesystem block size is not smaller than the page size.
- */
- if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
- (bp->b_flags & XBF_READ) && locking &&
- (blocksize >= PAGE_CACHE_SIZE)) {
- bio = bio_alloc(GFP_NOIO, 1);
-
- bio->bi_bdev = bp->b_target->bt_bdev;
- bio->bi_sector = sector - (offset >> BBSHIFT);
- bio->bi_end_io = xfs_buf_bio_end_io;
- bio->bi_private = bp;
-
- bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
- size = 0;
-
- atomic_inc(&bp->b_io_remaining);
-
- goto submit_io;
- }
-
- /* Lock down the pages which we need to for the request */
- if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) {
- for (i = 0; size; i++) {
- int nbytes = PAGE_CACHE_SIZE - offset;
- struct page *page = bp->b_pages[i];
-
- if (nbytes > size)
- nbytes = size;
-
- lock_page(page);
-
- size -= nbytes;
- offset = 0;
- }
- offset = bp->b_offset;
- size = bp->b_count_desired;
- }
-
-next_chunk:
- atomic_inc(&bp->b_io_remaining);
- nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
- if (nr_pages > total_nr_pages)
- nr_pages = total_nr_pages;
-
- bio = bio_alloc(GFP_NOIO, nr_pages);
- bio->bi_bdev = bp->b_target->bt_bdev;
- bio->bi_sector = sector;
- bio->bi_end_io = xfs_buf_bio_end_io;
- bio->bi_private = bp;
-
- for (; size && nr_pages; nr_pages--, map_i++) {
- int rbytes, nbytes = PAGE_CACHE_SIZE - offset;
-
- if (nbytes > size)
- nbytes = size;
-
- rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
- if (rbytes < nbytes)
- break;
-
- offset = 0;
- sector += nbytes >> BBSHIFT;
- size -= nbytes;
- total_nr_pages--;
- }
-
-submit_io:
- if (likely(bio->bi_size)) {
- submit_bio(rw, bio);
- if (size)
- goto next_chunk;
- } else {
- bio_put(bio);
- xfs_buf_ioerror(bp, EIO);
- }
-}
-
-int
-xfs_buf_iorequest(
- xfs_buf_t *bp)
-{
- XB_TRACE(bp, "iorequest", 0);
-
- if (bp->b_flags & XBF_DELWRI) {
- xfs_buf_delwri_queue(bp, 1);
- return 0;
- }
-
- if (bp->b_flags & XBF_WRITE) {
- xfs_buf_wait_unpin(bp);
- }
-
- xfs_buf_hold(bp);
-
- /* Set the count to 1 initially, this will stop an I/O
- * completion callout which happens before we have started
- * all the I/O from calling xfs_buf_ioend too early.
- */
- atomic_set(&bp->b_io_remaining, 1);
- _xfs_buf_ioapply(bp);
- _xfs_buf_ioend(bp, 0);
-
- xfs_buf_rele(bp);
- return 0;
-}
-
-/*
- * Waits for I/O to complete on the buffer supplied.
- * It returns immediately if no I/O is pending.
- * It returns the I/O error code, if any, or 0 if there was no error.
- */
-int
-xfs_buf_iowait(
- xfs_buf_t *bp)
-{
- XB_TRACE(bp, "iowait", 0);
- if (atomic_read(&bp->b_io_remaining))
- blk_run_address_space(bp->b_target->bt_mapping);
- down(&bp->b_iodonesema);
- XB_TRACE(bp, "iowaited", (long)bp->b_error);
- return bp->b_error;
-}
-
-xfs_caddr_t
-xfs_buf_offset(
- xfs_buf_t *bp,
- size_t offset)
-{
- struct page *page;
-
- if (bp->b_flags & XBF_MAPPED)
- return XFS_BUF_PTR(bp) + offset;
-
- offset += bp->b_offset;
- page = bp->b_pages[offset >> PAGE_CACHE_SHIFT];
- return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1));
-}
-
-/*
- * Move data into or out of a buffer.
- */
-void
-xfs_buf_iomove(
- xfs_buf_t *bp, /* buffer to process */
- size_t boff, /* starting buffer offset */
- size_t bsize, /* length to copy */
- caddr_t data, /* data address */
- xfs_buf_rw_t mode) /* read/write/zero flag */
-{
- size_t bend, cpoff, csize;
- struct page *page;
-
- bend = boff + bsize;
- while (boff < bend) {
- page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
- cpoff = xfs_buf_poff(boff + bp->b_offset);
- csize = min_t(size_t,
- PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff);
-
- ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
-
- switch (mode) {
- case XBRW_ZERO:
- memset(page_address(page) + cpoff, 0, csize);
- break;
- case XBRW_READ:
- memcpy(data, page_address(page) + cpoff, csize);
- break;
- case XBRW_WRITE:
- memcpy(page_address(page) + cpoff, data, csize);
- }
-
- boff += csize;
- data += csize;
- }
-}
-
-/*
- * Handling of buffer targets (buftargs).
- */
-
-/*
- * Wait for any bufs with callbacks that have been submitted but
- * have not yet returned... walk the hash list for the target.
- */
-void
-xfs_wait_buftarg(
- xfs_buftarg_t *btp)
-{
- xfs_buf_t *bp, *n;
- xfs_bufhash_t *hash;
- uint i;
-
- for (i = 0; i < (1 << btp->bt_hashshift); i++) {
- hash = &btp->bt_hash[i];
-again:
- spin_lock(&hash->bh_lock);
- list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
- ASSERT(btp == bp->b_target);
- if (!(bp->b_flags & XBF_FS_MANAGED)) {
- spin_unlock(&hash->bh_lock);
- /*
- * Catch superblock reference count leaks
- * immediately
- */
- BUG_ON(bp->b_bn == 0);
- delay(100);
- goto again;
- }
- }
- spin_unlock(&hash->bh_lock);
- }
-}
-
-/*
- * Allocate buffer hash table for a given target.
- * For devices containing metadata (i.e. not the log/realtime devices)
- * we need to allocate a much larger hash table.
- */
-STATIC void
-xfs_alloc_bufhash(
- xfs_buftarg_t *btp,
- int external)
-{
- unsigned int i;
-
- btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
- btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
- btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
- sizeof(xfs_bufhash_t), KM_SLEEP);
- for (i = 0; i < (1 << btp->bt_hashshift); i++) {
- spin_lock_init(&btp->bt_hash[i].bh_lock);
- INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
- }
-}
-
-STATIC void
-xfs_free_bufhash(
- xfs_buftarg_t *btp)
-{
- kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));
- btp->bt_hash = NULL;
-}
-
-/*
- * buftarg list for delwrite queue processing
- */
-STATIC LIST_HEAD(xfs_buftarg_list);
-STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);
-
-STATIC void
-xfs_register_buftarg(
- xfs_buftarg_t *btp)
-{
- spin_lock(&xfs_buftarg_lock);
- list_add(&btp->bt_list, &xfs_buftarg_list);
- spin_unlock(&xfs_buftarg_lock);
-}
-
-STATIC void
-xfs_unregister_buftarg(
- xfs_buftarg_t *btp)
-{
- spin_lock(&xfs_buftarg_lock);
- list_del(&btp->bt_list);
- spin_unlock(&xfs_buftarg_lock);
-}
-
-void
-xfs_free_buftarg(
- xfs_buftarg_t *btp,
- int external)
-{
- xfs_flush_buftarg(btp, 1);
- if (external)
- xfs_blkdev_put(btp->bt_bdev);
- xfs_free_bufhash(btp);
- iput(btp->bt_mapping->host);
-
- /* Unregister the buftarg first so that we don't get a
- * wakeup finding a non-existent task
- */
- xfs_unregister_buftarg(btp);
- kthread_stop(btp->bt_task);
-
- kmem_free(btp, sizeof(*btp));
-}
-
-STATIC int
-xfs_setsize_buftarg_flags(
- xfs_buftarg_t *btp,
- unsigned int blocksize,
- unsigned int sectorsize,
- int verbose)
-{
- btp->bt_bsize = blocksize;
- btp->bt_sshift = ffs(sectorsize) - 1;
- btp->bt_smask = sectorsize - 1;
-
- if (set_blocksize(btp->bt_bdev, sectorsize)) {
- printk(KERN_WARNING
- "XFS: Cannot set_blocksize to %u on device %s\n",
- sectorsize, XFS_BUFTARG_NAME(btp));
- return EINVAL;
- }
-
- if (verbose &&
- (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
- printk(KERN_WARNING
- "XFS: %u byte sectors in use on device %s. "
- "This is suboptimal; %u or greater is ideal.\n",
- sectorsize, XFS_BUFTARG_NAME(btp),
- (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
- }
-
- return 0;
-}
-
-/*
- * When allocating the initial buffer target we have not yet
- * read in the superblock, so don't know what sized sectors
- * are being used is at this early stage. Play safe.
- */
-STATIC int
-xfs_setsize_buftarg_early(
- xfs_buftarg_t *btp,
- struct block_device *bdev)
-{
- return xfs_setsize_buftarg_flags(btp,
- PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);
-}
-
-int
-xfs_setsize_buftarg(
- xfs_buftarg_t *btp,
- unsigned int blocksize,
- unsigned int sectorsize)
-{
- return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
-}
-
-STATIC int
-xfs_mapping_buftarg(
- xfs_buftarg_t *btp,
- struct block_device *bdev)
-{
- struct backing_dev_info *bdi;
- struct inode *inode;
- struct address_space *mapping;
- static struct address_space_operations mapping_aops = {
- .sync_page = block_sync_page,
- .migratepage = fail_migrate_page,
- };
-
- inode = new_inode(bdev->bd_inode->i_sb);
- if (!inode) {
- printk(KERN_WARNING
- "XFS: Cannot allocate mapping inode for device %s\n",
- XFS_BUFTARG_NAME(btp));
- return ENOMEM;
- }
- inode->i_mode = S_IFBLK;
- inode->i_bdev = bdev;
- inode->i_rdev = bdev->bd_dev;
- bdi = blk_get_backing_dev_info(bdev);
- if (!bdi)
- bdi = &default_backing_dev_info;
- mapping = &inode->i_data;
- mapping->a_ops = &mapping_aops;
- mapping->backing_dev_info = bdi;
- mapping_set_gfp_mask(mapping, GFP_NOFS);
- btp->bt_mapping = mapping;
- return 0;
-}
-
-STATIC int
-xfs_alloc_delwrite_queue(
- xfs_buftarg_t *btp)
-{
- int error = 0;
-
- INIT_LIST_HEAD(&btp->bt_list);
- INIT_LIST_HEAD(&btp->bt_delwrite_queue);
- spinlock_init(&btp->bt_delwrite_lock, "delwri_lock");
- btp->bt_flags = 0;
- btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
- if (IS_ERR(btp->bt_task)) {
- error = PTR_ERR(btp->bt_task);
- goto out_error;
- }
- xfs_register_buftarg(btp);
-out_error:
- return error;
-}
-
-xfs_buftarg_t *
-xfs_alloc_buftarg(
- struct block_device *bdev,
- int external)
-{
- xfs_buftarg_t *btp;
-
- btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
-
- btp->bt_dev = bdev->bd_dev;
- btp->bt_bdev = bdev;
- if (xfs_setsize_buftarg_early(btp, bdev))
- goto error;
- if (xfs_mapping_buftarg(btp, bdev))
- goto error;
- if (xfs_alloc_delwrite_queue(btp))
- goto error;
- xfs_alloc_bufhash(btp, external);
- return btp;
-
-error:
- kmem_free(btp, sizeof(*btp));
- return NULL;
-}
-
-
-/*
- * Delayed write buffer handling
- */
-STATIC void
-xfs_buf_delwri_queue(
- xfs_buf_t *bp,
- int unlock)
-{
- struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
- spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
-
- XB_TRACE(bp, "delwri_q", (long)unlock);
- ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
-
- spin_lock(dwlk);
- /* If already in the queue, dequeue and place at tail */
- if (!list_empty(&bp->b_list)) {
- ASSERT(bp->b_flags & _XBF_DELWRI_Q);
- if (unlock)
- atomic_dec(&bp->b_hold);
- list_del(&bp->b_list);
- }
-
- bp->b_flags |= _XBF_DELWRI_Q;
- list_add_tail(&bp->b_list, dwq);
- bp->b_queuetime = jiffies;
- spin_unlock(dwlk);
-
- if (unlock)
- xfs_buf_unlock(bp);
-}
-
-void
-xfs_buf_delwri_dequeue(
- xfs_buf_t *bp)
-{
- spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
- int dequeued = 0;
-
- spin_lock(dwlk);
- if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
- ASSERT(bp->b_flags & _XBF_DELWRI_Q);
- list_del_init(&bp->b_list);
- dequeued = 1;
- }
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
- spin_unlock(dwlk);
-
- if (dequeued)
- xfs_buf_rele(bp);
-
- XB_TRACE(bp, "delwri_dq", (long)dequeued);
-}
-
-STATIC void
-xfs_buf_runall_queues(
- struct workqueue_struct *queue)
-{
- flush_workqueue(queue);
-}
-
-STATIC int
-xfsbufd_wakeup(
- int priority,
- gfp_t mask)
-{
- xfs_buftarg_t *btp;
-
- spin_lock(&xfs_buftarg_lock);
- list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
- if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
- continue;
- set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
- wake_up_process(btp->bt_task);
- }
- spin_unlock(&xfs_buftarg_lock);
- return 0;
-}
-
-STATIC int
-xfsbufd(
- void *data)
-{
- struct list_head tmp;
- unsigned long age;
- xfs_buftarg_t *target = (xfs_buftarg_t *)data;
- xfs_buf_t *bp, *n;
- struct list_head *dwq = &target->bt_delwrite_queue;
- spinlock_t *dwlk = &target->bt_delwrite_lock;
-
- current->flags |= PF_MEMALLOC;
-
- INIT_LIST_HEAD(&tmp);
- do {
- if (unlikely(freezing(current))) {
- set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
- refrigerator();
- } else {
- clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
- }
-
- schedule_timeout_interruptible(
- xfs_buf_timer_centisecs * msecs_to_jiffies(10));
-
- age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
- spin_lock(dwlk);
- list_for_each_entry_safe(bp, n, dwq, b_list) {
- XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
- ASSERT(bp->b_flags & XBF_DELWRI);
-
- if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
- if (!test_bit(XBT_FORCE_FLUSH,
- &target->bt_flags) &&
- time_before(jiffies,
- bp->b_queuetime + age)) {
- xfs_buf_unlock(bp);
- break;
- }
-
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
- bp->b_flags |= XBF_WRITE;
- list_move(&bp->b_list, &tmp);
- }
- }
- spin_unlock(dwlk);
-
- while (!list_empty(&tmp)) {
- bp = list_entry(tmp.next, xfs_buf_t, b_list);
- ASSERT(target == bp->b_target);
-
- list_del_init(&bp->b_list);
- xfs_buf_iostrategy(bp);
-
- blk_run_address_space(target->bt_mapping);
- }
-
- if (as_list_len > 0)
- purge_addresses();
-
- clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
- } while (!kthread_should_stop());
-
- return 0;
-}
-
-/*
- * Go through all incore buffers, and release buffers if they belong to
- * the given device. This is used in filesystem error handling to
- * preserve the consistency of its metadata.
- */
-int
-xfs_flush_buftarg(
- xfs_buftarg_t *target,
- int wait)
-{
- struct list_head tmp;
- xfs_buf_t *bp, *n;
- int pincount = 0;
- struct list_head *dwq = &target->bt_delwrite_queue;
- spinlock_t *dwlk = &target->bt_delwrite_lock;
-
- xfs_buf_runall_queues(xfsdatad_workqueue);
- xfs_buf_runall_queues(xfslogd_workqueue);
-
- INIT_LIST_HEAD(&tmp);
- spin_lock(dwlk);
- list_for_each_entry_safe(bp, n, dwq, b_list) {
- ASSERT(bp->b_target == target);
- ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
- XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
- if (xfs_buf_ispin(bp)) {
- pincount++;
- continue;
- }
-
- list_move(&bp->b_list, &tmp);
- }
- spin_unlock(dwlk);
-
- /*
- * Dropped the delayed write list lock, now walk the temporary list
- */
- list_for_each_entry_safe(bp, n, &tmp, b_list) {
- xfs_buf_lock(bp);
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
- bp->b_flags |= XBF_WRITE;
- if (wait)
- bp->b_flags &= ~XBF_ASYNC;
- else
- list_del_init(&bp->b_list);
-
- xfs_buf_iostrategy(bp);
- }
-
- /*
- * Remaining list items must be flushed before returning
- */
- while (!list_empty(&tmp)) {
- bp = list_entry(tmp.next, xfs_buf_t, b_list);
-
- list_del_init(&bp->b_list);
- xfs_iowait(bp);
- xfs_buf_relse(bp);
- }
-
- if (wait)
- blk_run_address_space(target->bt_mapping);
-
- return pincount;
-}
-
-int __init
-xfs_buf_init(void)
-{
- int error = -ENOMEM;
-
-#ifdef XFS_BUF_TRACE
- xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);
-#endif
-
- xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
- if (!xfs_buf_zone)
- goto out_free_trace_buf;
-
- xfslogd_workqueue = create_workqueue("xfslogd");
- if (!xfslogd_workqueue)
- goto out_free_buf_zone;
-
- xfsdatad_workqueue = create_workqueue("xfsdatad");
- if (!xfsdatad_workqueue)
- goto out_destroy_xfslogd_workqueue;
-
- xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);
- if (!xfs_buf_shake)
- goto out_destroy_xfsdatad_workqueue;
-
- return 0;
-
- out_destroy_xfsdatad_workqueue:
- destroy_workqueue(xfsdatad_workqueue);
- out_destroy_xfslogd_workqueue:
- destroy_workqueue(xfslogd_workqueue);
- out_free_buf_zone:
- kmem_zone_destroy(xfs_buf_zone);
- out_free_trace_buf:
-#ifdef XFS_BUF_TRACE
- ktrace_free(xfs_buf_trace_buf);
-#endif
- return error;
-}
-
-void
-xfs_buf_terminate(void)
-{
- kmem_shake_deregister(xfs_buf_shake);
- destroy_workqueue(xfsdatad_workqueue);
- destroy_workqueue(xfslogd_workqueue);
- kmem_zone_destroy(xfs_buf_zone);
-#ifdef XFS_BUF_TRACE
- ktrace_free(xfs_buf_trace_buf);
-#endif
-}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
deleted file mode 100644
index 4dd6592d5a4..00000000000
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_BUF_H__
-#define __XFS_BUF_H__
-
-#include <linux/config.h>
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <asm/system.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/uio.h>
-
-/*
- * Base types
- */
-
-#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
-
-#define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE)
-#define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
-#define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT)
-#define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK)
-
-typedef enum {
- XBRW_READ = 1, /* transfer into target memory */
- XBRW_WRITE = 2, /* transfer from target memory */
- XBRW_ZERO = 3, /* Zero target memory */
-} xfs_buf_rw_t;
-
-typedef enum {
- XBF_READ = (1 << 0), /* buffer intended for reading from device */
- XBF_WRITE = (1 << 1), /* buffer intended for writing to device */
- XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */
- XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */
- XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */
- XBF_DELWRI = (1 << 6), /* buffer has dirty pages */
- XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */
- XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
- XBF_ORDERED = (1 << 11), /* use ordered writes */
- XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
-
- /* flags used only as arguments to access routines */
- XBF_LOCK = (1 << 14), /* lock requested */
- XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */
- XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */
-
- /* flags used only internally */
- _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
- _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */
- _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
- _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
-} xfs_buf_flags_t;
-
-typedef enum {
- XBT_FORCE_SLEEP = (0 << 1),
- XBT_FORCE_FLUSH = (1 << 1),
-} xfs_buftarg_flags_t;
-
-typedef struct xfs_bufhash {
- struct list_head bh_list;
- spinlock_t bh_lock;
-} xfs_bufhash_t;
-
-typedef struct xfs_buftarg {
- dev_t bt_dev;
- struct block_device *bt_bdev;
- struct address_space *bt_mapping;
- unsigned int bt_bsize;
- unsigned int bt_sshift;
- size_t bt_smask;
-
- /* per device buffer hash table */
- uint bt_hashmask;
- uint bt_hashshift;
- xfs_bufhash_t *bt_hash;
-
- /* per device delwri queue */
- struct task_struct *bt_task;
- struct list_head bt_list;
- struct list_head bt_delwrite_queue;
- spinlock_t bt_delwrite_lock;
- unsigned long bt_flags;
-} xfs_buftarg_t;
-
-/*
- * xfs_buf_t: Buffer structure for pagecache-based buffers
- *
- * This buffer structure is used by the pagecache buffer management routines
- * to refer to an assembly of pages forming a logical buffer.
- *
- * The buffer structure is used on a temporary basis only, and discarded when
- * released. The real data storage is recorded in the pagecache. Buffers are
- * hashed to the block device on which the file system resides.
- */
-
-struct xfs_buf;
-typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
-typedef void (*xfs_buf_relse_t)(struct xfs_buf *);
-typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
-
-#define XB_PAGES 2
-
-typedef struct xfs_buf {
- struct semaphore b_sema; /* semaphore for lockables */
- unsigned long b_queuetime; /* time buffer was queued */
- atomic_t b_pin_count; /* pin count */
- wait_queue_head_t b_waiters; /* unpin waiters */
- struct list_head b_list;
- xfs_buf_flags_t b_flags; /* status flags */
- struct list_head b_hash_list; /* hash table list */
- xfs_bufhash_t *b_hash; /* hash table list start */
- xfs_buftarg_t *b_target; /* buffer target (device) */
- atomic_t b_hold; /* reference count */
- xfs_daddr_t b_bn; /* block number for I/O */
- xfs_off_t b_file_offset; /* offset in file */
- size_t b_buffer_length;/* size of buffer in bytes */
- size_t b_count_desired;/* desired transfer size */
- void *b_addr; /* virtual address of buffer */
- struct work_struct b_iodone_work;
- atomic_t b_io_remaining; /* #outstanding I/O requests */
- xfs_buf_iodone_t b_iodone; /* I/O completion function */
- xfs_buf_relse_t b_relse; /* releasing function */
- xfs_buf_bdstrat_t b_strat; /* pre-write function */
- struct semaphore b_iodonesema; /* Semaphore for I/O waiters */
- void *b_fspriv;
- void *b_fspriv2;
- void *b_fspriv3;
- unsigned short b_error; /* error code on I/O */
- unsigned short b_locked; /* page array is locked */
- unsigned int b_page_count; /* size of page array */
- unsigned int b_offset; /* page offset in first page */
- struct page **b_pages; /* array of page pointers */
- struct page *b_page_array[XB_PAGES]; /* inline pages */
-#ifdef XFS_BUF_LOCK_TRACKING
- int b_last_holder;
-#endif
-} xfs_buf_t;
-
-
-/* Finding and Reading Buffers */
-extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
- xfs_buf_flags_t, xfs_buf_t *);
-#define xfs_incore(buftarg,blkno,len,lockit) \
- _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
-
-extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t,
- xfs_buf_flags_t);
-#define xfs_buf_get(target, blkno, len, flags) \
- xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
-
-extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,
- xfs_buf_flags_t);
-#define xfs_buf_read(target, blkno, len, flags) \
- xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
-
-extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
-extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
-extern void xfs_buf_hold(xfs_buf_t *);
-extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
- xfs_buf_flags_t);
-
-/* Releasing Buffers */
-extern void xfs_buf_free(xfs_buf_t *);
-extern void xfs_buf_rele(xfs_buf_t *);
-
-/* Locking and Unlocking Buffers */
-extern int xfs_buf_cond_lock(xfs_buf_t *);
-extern int xfs_buf_lock_value(xfs_buf_t *);
-extern void xfs_buf_lock(xfs_buf_t *);
-extern void xfs_buf_unlock(xfs_buf_t *);
-
-/* Buffer Read and Write Routines */
-extern void xfs_buf_ioend(xfs_buf_t *, int);
-extern void xfs_buf_ioerror(xfs_buf_t *, int);
-extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t);
-extern int xfs_buf_iorequest(xfs_buf_t *);
-extern int xfs_buf_iowait(xfs_buf_t *);
-extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t,
- xfs_buf_rw_t);
-
-static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
-{
- return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);
-}
-
-static inline int xfs_buf_geterror(xfs_buf_t *bp)
-{
- return bp ? bp->b_error : ENOMEM;
-}
-
-/* Buffer Utility Routines */
-extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
-
-/* Pinning Buffer Storage in Memory */
-extern void xfs_buf_pin(xfs_buf_t *);
-extern void xfs_buf_unpin(xfs_buf_t *);
-extern int xfs_buf_ispin(xfs_buf_t *);
-
-/* Delayed Write Buffer Routines */
-extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
-
-/* Buffer Daemon Setup Routines */
-extern int xfs_buf_init(void);
-extern void xfs_buf_terminate(void);
-
-#ifdef XFS_BUF_TRACE
-extern ktrace_t *xfs_buf_trace_buf;
-extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
-#else
-#define xfs_buf_trace(bp,id,ptr,ra) do { } while (0)
-#endif
-
-#define xfs_buf_target_name(target) \
- ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
-
-
-#define XFS_B_ASYNC XBF_ASYNC
-#define XFS_B_DELWRI XBF_DELWRI
-#define XFS_B_READ XBF_READ
-#define XFS_B_WRITE XBF_WRITE
-#define XFS_B_STALE XBF_STALE
-
-#define XFS_BUF_TRYLOCK XBF_TRYLOCK
-#define XFS_INCORE_TRYLOCK XBF_TRYLOCK
-#define XFS_BUF_LOCK XBF_LOCK
-#define XFS_BUF_MAPPED XBF_MAPPED
-
-#define BUF_BUSY XBF_DONT_BLOCK
-
-#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
-#define XFS_BUF_ZEROFLAGS(bp) \
- ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI))
-
-#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE)
-#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE)
-#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE)
-#define XFS_BUF_SUPER_STALE(bp) do { \
- XFS_BUF_STALE(bp); \
- xfs_buf_delwri_dequeue(bp); \
- XFS_BUF_DONE(bp); \
- } while (0)
-
-#define XFS_BUF_MANAGE XBF_FS_MANAGED
-#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED)
-
-#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
-#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp)
-#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI)
-
-#define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no)
-#define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp)
-#define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0)
-
-#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE)
-#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE)
-#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE)
-
-#define XFS_BUF_BUSY(bp) do { } while (0)
-#define XFS_BUF_UNBUSY(bp) do { } while (0)
-#define XFS_BUF_ISBUSY(bp) (1)
-
-#define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC)
-#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC)
-#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC)
-
-#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED)
-#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED)
-#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED)
-
-#define XFS_BUF_SHUT(bp) do { } while (0)
-#define XFS_BUF_UNSHUT(bp) do { } while (0)
-#define XFS_BUF_ISSHUT(bp) (0)
-
-#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp)
-#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)
-#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)
-#define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ)
-
-#define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE)
-#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)
-#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)
-
-#define XFS_BUF_ISUNINITIAL(bp) (0)
-#define XFS_BUF_UNUNINITIAL(bp) (0)
-
-#define XFS_BUF_BP_ISMAPPED(bp) (1)
-
-#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
-#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
-#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
-#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func))
-#define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL)
-
-#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)
-#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
-#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
-#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
-#define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3)
-#define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val))
-#define XFS_BUF_SET_START(bp) do { } while (0)
-#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func))
-
-#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr)
-#define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt)
-#define XFS_BUF_ADDR(bp) ((bp)->b_bn)
-#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno))
-#define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset)
-#define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off))
-#define XFS_BUF_COUNT(bp) ((bp)->b_count_desired)
-#define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt))
-#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
-#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
-
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0)
-#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
-#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
-
-#define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp)
-
-#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)
-#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
-#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)
-#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)
-#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema);
-
-#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
-#define XFS_BUF_TARGET(bp) ((bp)->b_target)
-#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target)
-
-static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
-{
- bp->b_fspriv3 = mp;
- bp->b_strat = xfs_bdstrat_cb;
- xfs_buf_delwri_dequeue(bp);
- return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
-}
-
-static inline void xfs_buf_relse(xfs_buf_t *bp)
-{
- if (!bp->b_relse)
- xfs_buf_unlock(bp);
- xfs_buf_rele(bp);
-}
-
-#define xfs_bpin(bp) xfs_buf_pin(bp)
-#define xfs_bunpin(bp) xfs_buf_unpin(bp)
-
-#define xfs_buftrace(id, bp) \
- xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
-
-#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
-
-#define xfs_biomove(bp, off, len, data, rw) \
- xfs_buf_iomove((bp), (off), (len), (data), \
- ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ)
-
-#define xfs_biozero(bp, off, len) \
- xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-
-
-static inline int XFS_bwrite(xfs_buf_t *bp)
-{
- int iowait = (bp->b_flags & XBF_ASYNC) == 0;
- int error = 0;
-
- if (!iowait)
- bp->b_flags |= _XBF_RUN_QUEUES;
-
- xfs_buf_delwri_dequeue(bp);
- xfs_buf_iostrategy(bp);
- if (iowait) {
- error = xfs_buf_iowait(bp);
- xfs_buf_relse(bp);
- }
- return error;
-}
-
-#define XFS_bdwrite(bp) xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC)
-
-static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
-{
- bp->b_strat = xfs_bdstrat_cb;
- bp->b_fspriv3 = mp;
- return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
-}
-
-#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
-
-#define xfs_iowait(bp) xfs_buf_iowait(bp)
-
-#define xfs_baread(target, rablkno, ralen) \
- xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
-
-
-/*
- * Handling of buftargs.
- */
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
-extern void xfs_free_buftarg(xfs_buftarg_t *, int);
-extern void xfs_wait_buftarg(xfs_buftarg_t *);
-extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
-extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
-
-#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
-#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
-
-#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1)
-#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1)
-
-#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
deleted file mode 100644
index e7f3da61c6c..00000000000
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_CRED_H__
-#define __XFS_CRED_H__
-
-#include <linux/capability.h>
-
-/*
- * Credentials
- */
-typedef struct cred {
- /* EMPTY */
-} cred_t;
-
-extern struct cred *sys_cred;
-
-/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */
-static __inline int capable_cred(cred_t *cr, int cid)
-{
- return (cr == sys_cred) ? 1 : capable(cid);
-}
-
-#endif /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
deleted file mode 100644
index 80eb249f2fa..00000000000
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_types.h"
-#include "xfs_dmapi.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_dir.h"
-#include "xfs_mount.h"
-#include "xfs_export.h"
-
-/*
- * XFS encodes and decodes the fileid portion of NFS filehandles
- * itself instead of letting the generic NFS code do it. This
- * allows filesystems with 64 bit inode numbers to be exported.
- *
- * Note that a side effect is that xfs_vget() won't be passed a
- * zero inode/generation pair under normal circumstances. As
- * however a malicious client could send us such data, the check
- * remains in that code.
- */
-
-STATIC struct dentry *
-linvfs_decode_fh(
- struct super_block *sb,
- __u32 *fh,
- int fh_len,
- int fileid_type,
- int (*acceptable)(
- void *context,
- struct dentry *de),
- void *context)
-{
- xfs_fid2_t ifid;
- xfs_fid2_t pfid;
- void *parent = NULL;
- int is64 = 0;
- __u32 *p = fh;
-
-#if XFS_BIG_INUMS
- is64 = (fileid_type & XFS_FILEID_TYPE_64FLAG);
- fileid_type &= ~XFS_FILEID_TYPE_64FLAG;
-#endif
-
- /*
- * Note that we only accept fileids which are long enough
- * rather than allow the parent generation number to default
- * to zero. XFS considers zero a valid generation number not
- * an invalid/wildcard value. There's little point printk'ing
- * a warning here as we don't have the client information
- * which would make such a warning useful.
- */
- if (fileid_type > 2 ||
- fh_len < xfs_fileid_length((fileid_type == 2), is64))
- return NULL;
-
- p = xfs_fileid_decode_fid2(p, &ifid, is64);
-
- if (fileid_type == 2) {
- p = xfs_fileid_decode_fid2(p, &pfid, is64);
- parent = &pfid;
- }
-
- fh = (__u32 *)&ifid;
- return find_exported_dentry(sb, fh, parent, acceptable, context);
-}
-
-
-STATIC int
-linvfs_encode_fh(
- struct dentry *dentry,
- __u32 *fh,
- int *max_len,
- int connectable)
-{
- struct inode *inode = dentry->d_inode;
- int type = 1;
- __u32 *p = fh;
- int len;
- int is64 = 0;
-#if XFS_BIG_INUMS
- vfs_t *vfs = LINVFS_GET_VFS(inode->i_sb);
-
- if (!(vfs->vfs_flag & VFS_32BITINODES)) {
- /* filesystem may contain 64bit inode numbers */
- is64 = XFS_FILEID_TYPE_64FLAG;
- }
-#endif
-
- /* Directories don't need their parent encoded, they have ".." */
- if (S_ISDIR(inode->i_mode))
- connectable = 0;
-
- /*
- * Only encode if there is enough space given. In practice
- * this means we can't export a filesystem with 64bit inodes
- * over NFSv2 with the subtree_check export option; the other
- * seven combinations work. The real answer is "don't use v2".
- */
- len = xfs_fileid_length(connectable, is64);
- if (*max_len < len)
- return 255;
- *max_len = len;
-
- p = xfs_fileid_encode_inode(p, inode, is64);
- if (connectable) {
- spin_lock(&dentry->d_lock);
- p = xfs_fileid_encode_inode(p, dentry->d_parent->d_inode, is64);
- spin_unlock(&dentry->d_lock);
- type = 2;
- }
- BUG_ON((p - fh) != len);
- return type | is64;
-}
-
-STATIC struct dentry *
-linvfs_get_dentry(
- struct super_block *sb,
- void *data)
-{
- vnode_t *vp;
- struct inode *inode;
- struct dentry *result;
- vfs_t *vfsp = LINVFS_GET_VFS(sb);
- int error;
-
- VFS_VGET(vfsp, &vp, (fid_t *)data, error);
- if (error || vp == NULL)
- return ERR_PTR(-ESTALE) ;
-
- inode = LINVFS_GET_IP(vp);
- result = d_alloc_anon(inode);
- if (!result) {
- iput(inode);
- return ERR_PTR(-ENOMEM);
- }
- return result;
-}
-
-STATIC struct dentry *
-linvfs_get_parent(
- struct dentry *child)
-{
- int error;
- vnode_t *vp, *cvp;
- struct dentry *parent;
- struct dentry dotdot;
-
- dotdot.d_name.name = "..";
- dotdot.d_name.len = 2;
- dotdot.d_inode = NULL;
-
- cvp = NULL;
- vp = LINVFS_GET_VP(child->d_inode);
- VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
- if (unlikely(error))
- return ERR_PTR(-error);
-
- parent = d_alloc_anon(LINVFS_GET_IP(cvp));
- if (unlikely(!parent)) {
- VN_RELE(cvp);
- return ERR_PTR(-ENOMEM);
- }
- return parent;
-}
-
-struct export_operations linvfs_export_ops = {
- .decode_fh = linvfs_decode_fh,
- .encode_fh = linvfs_encode_fh,
- .get_parent = linvfs_get_parent,
- .get_dentry = linvfs_get_dentry,
-};
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
deleted file mode 100644
index ced4404339c..00000000000
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ /dev/null
@@ -1,595 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_trans.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_error.h"
-#include "xfs_rw.h"
-#include "xfs_ioctl32.h"
-
-#include <linux/dcache.h>
-#include <linux/smp_lock.h>
-
-static struct vm_operations_struct linvfs_file_vm_ops;
-#ifdef CONFIG_XFS_DMAPI
-static struct vm_operations_struct linvfs_dmapi_file_vm_ops;
-#endif
-
-STATIC inline ssize_t
-__linvfs_read(
- struct kiocb *iocb,
- char __user *buf,
- int ioflags,
- size_t count,
- loff_t pos)
-{
- struct iovec iov = {buf, count};
- struct file *file = iocb->ki_filp;
- vnode_t *vp = LINVFS_GET_VP(file->f_dentry->d_inode);
- ssize_t rval;
-
- BUG_ON(iocb->ki_pos != pos);
-
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
- VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
- return rval;
-}
-
-
-STATIC ssize_t
-linvfs_aio_read(
- struct kiocb *iocb,
- char __user *buf,
- size_t count,
- loff_t pos)
-{
- return __linvfs_read(iocb, buf, IO_ISAIO, count, pos);
-}
-
-STATIC ssize_t
-linvfs_aio_read_invis(
- struct kiocb *iocb,
- char __user *buf,
- size_t count,
- loff_t pos)
-{
- return __linvfs_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
-}
-
-
-STATIC inline ssize_t
-__linvfs_write(
- struct kiocb *iocb,
- const char __user *buf,
- int ioflags,
- size_t count,
- loff_t pos)
-{
- struct iovec iov = {(void __user *)buf, count};
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = LINVFS_GET_VP(inode);
- ssize_t rval;
-
- BUG_ON(iocb->ki_pos != pos);
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
-
- VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
- return rval;
-}
-
-
-STATIC ssize_t
-linvfs_aio_write(
- struct kiocb *iocb,
- const char __user *buf,
- size_t count,
- loff_t pos)
-{
- return __linvfs_write(iocb, buf, IO_ISAIO, count, pos);
-}
-
-STATIC ssize_t
-linvfs_aio_write_invis(
- struct kiocb *iocb,
- const char __user *buf,
- size_t count,
- loff_t pos)
-{
- return __linvfs_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
-}
-
-
-STATIC inline ssize_t
-__linvfs_readv(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = LINVFS_GET_VP(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
-
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
- VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-linvfs_readv(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __linvfs_readv(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-linvfs_readv_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __linvfs_readv(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
-
-STATIC inline ssize_t
-__linvfs_writev(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = LINVFS_GET_VP(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
-
- VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-
-STATIC ssize_t
-linvfs_writev(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __linvfs_writev(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-linvfs_writev_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __linvfs_writev(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
-STATIC ssize_t
-linvfs_sendfile(
- struct file *filp,
- loff_t *ppos,
- size_t count,
- read_actor_t actor,
- void *target)
-{
- vnode_t *vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
- ssize_t rval;
-
- VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, rval);
- return rval;
-}
-
-
-STATIC int
-linvfs_open(
- struct inode *inode,
- struct file *filp)
-{
- vnode_t *vp = LINVFS_GET_VP(inode);
- int error;
-
- if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
- return -EFBIG;
-
- ASSERT(vp);
- VOP_OPEN(vp, NULL, error);
- return -error;
-}
-
-
-STATIC int
-linvfs_release(
- struct inode *inode,
- struct file *filp)
-{
- vnode_t *vp = LINVFS_GET_VP(inode);
- int error = 0;
-
- if (vp)
- VOP_RELEASE(vp, error);
- return -error;
-}
-
-
-STATIC int
-linvfs_fsync(
- struct file *filp,
- struct dentry *dentry,
- int datasync)
-{
- struct inode *inode = dentry->d_inode;
- vnode_t *vp = LINVFS_GET_VP(inode);
- int error;
- int flags = FSYNC_WAIT;
-
- if (datasync)
- flags |= FSYNC_DATA;
-
- ASSERT(vp);
- VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error);
- return -error;
-}
-
-/*
- * linvfs_readdir maps to VOP_READDIR().
- * We need to build a uio, cred, ...
- */
-
-#define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
-
-#ifdef CONFIG_XFS_DMAPI
-
-STATIC struct page *
-linvfs_filemap_nopage(
- struct vm_area_struct *area,
- unsigned long address,
- int *type)
-{
- struct inode *inode = area->vm_file->f_dentry->d_inode;
- vnode_t *vp = LINVFS_GET_VP(inode);
- xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
- int error;
-
- ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
-
- error = XFS_SEND_MMAP(mp, area, 0);
- if (error)
- return NULL;
-
- return filemap_nopage(area, address, type);
-}
-
-#endif /* CONFIG_XFS_DMAPI */
-
-
-STATIC int
-linvfs_readdir(
- struct file *filp,
- void *dirent,
- filldir_t filldir)
-{
- int error = 0;
- vnode_t *vp;
- uio_t uio;
- iovec_t iov;
- int eof = 0;
- caddr_t read_buf;
- int namelen, size = 0;
- size_t rlen = PAGE_CACHE_SIZE;
- xfs_off_t start_offset, curr_offset;
- xfs_dirent_t *dbp = NULL;
-
- vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
- ASSERT(vp);
-
- /* Try fairly hard to get memory */
- do {
- if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL)))
- break;
- rlen >>= 1;
- } while (rlen >= 1024);
-
- if (read_buf == NULL)
- return -ENOMEM;
-
- uio.uio_iov = &iov;
- uio.uio_segflg = UIO_SYSSPACE;
- curr_offset = filp->f_pos;
- if (filp->f_pos != 0x7fffffff)
- uio.uio_offset = filp->f_pos;
- else
- uio.uio_offset = 0xffffffff;
-
- while (!eof) {
- uio.uio_resid = iov.iov_len = rlen;
- iov.iov_base = read_buf;
- uio.uio_iovcnt = 1;
-
- start_offset = uio.uio_offset;
-
- VOP_READDIR(vp, &uio, NULL, &eof, error);
- if ((uio.uio_offset == start_offset) || error) {
- size = 0;
- break;
- }
-
- size = rlen - uio.uio_resid;
- dbp = (xfs_dirent_t *)read_buf;
- while (size > 0) {
- namelen = strlen(dbp->d_name);
-
- if (filldir(dirent, dbp->d_name, namelen,
- (loff_t) curr_offset & 0x7fffffff,
- (ino_t) dbp->d_ino,
- DT_UNKNOWN)) {
- goto done;
- }
- size -= dbp->d_reclen;
- curr_offset = (loff_t)dbp->d_off /* & 0x7fffffff */;
- dbp = nextdp(dbp);
- }
- }
-done:
- if (!error) {
- if (size == 0)
- filp->f_pos = uio.uio_offset & 0x7fffffff;
- else if (dbp)
- filp->f_pos = curr_offset;
- }
-
- kfree(read_buf);
- return -error;
-}
-
-
-STATIC int
-linvfs_file_mmap(
- struct file *filp,
- struct vm_area_struct *vma)
-{
- struct inode *ip = filp->f_dentry->d_inode;
- vnode_t *vp = LINVFS_GET_VP(ip);
- vattr_t va = { .va_mask = XFS_AT_UPDATIME };
- int error;
-
- vma->vm_ops = &linvfs_file_vm_ops;
-
-#ifdef CONFIG_XFS_DMAPI
- if (vp->v_vfsp->vfs_flag & VFS_DMI) {
- vma->vm_ops = &linvfs_dmapi_file_vm_ops;
- }
-#endif /* CONFIG_XFS_DMAPI */
-
- VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
- if (!error)
- vn_revalidate(vp); /* update Linux inode flags */
- return 0;
-}
-
-
-STATIC long
-linvfs_ioctl(
- struct file *filp,
- unsigned int cmd,
- unsigned long arg)
-{
- int error;
- struct inode *inode = filp->f_dentry->d_inode;
- vnode_t *vp = LINVFS_GET_VP(inode);
-
- VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error);
- VMODIFY(vp);
-
- /* NOTE: some of the ioctl's return positive #'s as a
- * byte count indicating success, such as
- * readlink_by_handle. So we don't "sign flip"
- * like most other routines. This means true
- * errors need to be returned as a negative value.
- */
- return error;
-}
-
-STATIC long
-linvfs_ioctl_invis(
- struct file *filp,
- unsigned int cmd,
- unsigned long arg)
-{
- int error;
- struct inode *inode = filp->f_dentry->d_inode;
- vnode_t *vp = LINVFS_GET_VP(inode);
-
- ASSERT(vp);
- VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error);
- VMODIFY(vp);
-
- /* NOTE: some of the ioctl's return positive #'s as a
- * byte count indicating success, such as
- * readlink_by_handle. So we don't "sign flip"
- * like most other routines. This means true
- * errors need to be returned as a negative value.
- */
- return error;
-}
-
-#ifdef CONFIG_XFS_DMAPI
-#ifdef HAVE_VMOP_MPROTECT
-STATIC int
-linvfs_mprotect(
- struct vm_area_struct *vma,
- unsigned int newflags)
-{
- vnode_t *vp = LINVFS_GET_VP(vma->vm_file->f_dentry->d_inode);
- int error = 0;
-
- if (vp->v_vfsp->vfs_flag & VFS_DMI) {
- if ((vma->vm_flags & VM_MAYSHARE) &&
- (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE)) {
- xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
-
- error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
- }
- }
- return error;
-}
-#endif /* HAVE_VMOP_MPROTECT */
-#endif /* CONFIG_XFS_DMAPI */
-
-#ifdef HAVE_FOP_OPEN_EXEC
-/* If the user is attempting to execute a file that is offline then
- * we have to trigger a DMAPI READ event before the file is marked as busy
- * otherwise the invisible I/O will not be able to write to the file to bring
- * it back online.
- */
-STATIC int
-linvfs_open_exec(
- struct inode *inode)
-{
- vnode_t *vp = LINVFS_GET_VP(inode);
- xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
- int error = 0;
- xfs_inode_t *ip;
-
- if (vp->v_vfsp->vfs_flag & VFS_DMI) {
- ip = xfs_vtoi(vp);
- if (!ip) {
- error = -EINVAL;
- goto open_exec_out;
- }
- if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {
- error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
- 0, 0, 0, NULL);
- }
- }
-open_exec_out:
- return error;
-}
-#endif /* HAVE_FOP_OPEN_EXEC */
-
-struct file_operations linvfs_file_operations = {
- .llseek = generic_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .readv = linvfs_readv,
- .writev = linvfs_writev,
- .aio_read = linvfs_aio_read,
- .aio_write = linvfs_aio_write,
- .sendfile = linvfs_sendfile,
- .unlocked_ioctl = linvfs_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = linvfs_compat_ioctl,
-#endif
- .mmap = linvfs_file_mmap,
- .open = linvfs_open,
- .release = linvfs_release,
- .fsync = linvfs_fsync,
-#ifdef HAVE_FOP_OPEN_EXEC
- .open_exec = linvfs_open_exec,
-#endif
-};
-
-struct file_operations linvfs_invis_file_operations = {
- .llseek = generic_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .readv = linvfs_readv_invis,
- .writev = linvfs_writev_invis,
- .aio_read = linvfs_aio_read_invis,
- .aio_write = linvfs_aio_write_invis,
- .sendfile = linvfs_sendfile,
- .unlocked_ioctl = linvfs_ioctl_invis,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = linvfs_compat_invis_ioctl,
-#endif
- .mmap = linvfs_file_mmap,
- .open = linvfs_open,
- .release = linvfs_release,
- .fsync = linvfs_fsync,
-};
-
-
-struct file_operations linvfs_dir_operations = {
- .read = generic_read_dir,
- .readdir = linvfs_readdir,
- .unlocked_ioctl = linvfs_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = linvfs_compat_ioctl,
-#endif
- .fsync = linvfs_fsync,
-};
-
-static struct vm_operations_struct linvfs_file_vm_ops = {
- .nopage = filemap_nopage,
- .populate = filemap_populate,
-};
-
-#ifdef CONFIG_XFS_DMAPI
-static struct vm_operations_struct linvfs_dmapi_file_vm_ops = {
- .nopage = linvfs_filemap_nopage,
- .populate = filemap_populate,
-#ifdef HAVE_VMOP_MPROTECT
- .mprotect = linvfs_mprotect,
-#endif
-};
-#endif /* CONFIG_XFS_DMAPI */
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
deleted file mode 100644
index 4fa4b1a5187..00000000000
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "xfs.h"
-
-/*
- * Stub for no-op vnode operations that return error status.
- */
-int
-fs_noerr(void)
-{
- return 0;
-}
-
-/*
- * Operation unsupported under this file system.
- */
-int
-fs_nosys(void)
-{
- return ENOSYS;
-}
-
-/*
- * Stub for inactive, strategy, and read/write lock/unlock. Does nothing.
- */
-/* ARGSUSED */
-void
-fs_noval(void)
-{
-}
-
-/*
- * vnode pcache layer for vnode_tosspages.
- * 'last' parameter unused but left in for IRIX compatibility
- */
-void
-fs_tosspages(
- bhv_desc_t *bdp,
- xfs_off_t first,
- xfs_off_t last,
- int fiopt)
-{
- vnode_t *vp = BHV_TO_VNODE(bdp);
- struct inode *ip = LINVFS_GET_IP(vp);
-
- if (VN_CACHED(vp))
- truncate_inode_pages(ip->i_mapping, first);
-}
-
-
-/*
- * vnode pcache layer for vnode_flushinval_pages.
- * 'last' parameter unused but left in for IRIX compatibility
- */
-void
-fs_flushinval_pages(
- bhv_desc_t *bdp,
- xfs_off_t first,
- xfs_off_t last,
- int fiopt)
-{
- vnode_t *vp = BHV_TO_VNODE(bdp);
- struct inode *ip = LINVFS_GET_IP(vp);
-
- if (VN_CACHED(vp)) {
- filemap_write_and_wait(ip->i_mapping);
-
- truncate_inode_pages(ip->i_mapping, first);
- }
-}
-
-/*
- * vnode pcache layer for vnode_flush_pages.
- * 'last' parameter unused but left in for IRIX compatibility
- */
-int
-fs_flush_pages(
- bhv_desc_t *bdp,
- xfs_off_t first,
- xfs_off_t last,
- uint64_t flags,
- int fiopt)
-{
- vnode_t *vp = BHV_TO_VNODE(bdp);
- struct inode *ip = LINVFS_GET_IP(vp);
-
- if (VN_CACHED(vp)) {
- filemap_fdatawrite(ip->i_mapping);
- if (flags & XFS_B_ASYNC)
- return 0;
- filemap_fdatawait(ip->i_mapping);
- }
-
- return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h
deleted file mode 100644
index aee9ccdd18f..00000000000
--- a/fs/xfs/linux-2.6/xfs_fs_subr.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_FS_SUBR_H__
-#define __XFS_FS_SUBR_H__
-
-struct cred;
-extern int fs_noerr(void);
-extern int fs_nosys(void);
-extern void fs_noval(void);
-extern void fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
-extern void fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
-extern int fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int);
-
-#endif /* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
deleted file mode 100644
index e1a22bfcf86..00000000000
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_GLOBALS_H__
-#define __XFS_GLOBALS_H__
-
-extern uint64_t xfs_panic_mask; /* set to cause more panics */
-extern unsigned long xfs_physmem;
-extern struct cred *sys_cred;
-
-#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
deleted file mode 100644
index 4db47790415..00000000000
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ /dev/null
@@ -1,1334 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_rtalloc.h"
-#include "xfs_itable.h"
-#include "xfs_error.h"
-#include "xfs_rw.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
-#include "xfs_attr.h"
-#include "xfs_bmap.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_dfrag.h"
-#include "xfs_fsops.h"
-
-#include <linux/capability.h>
-#include <linux/dcache.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/pagemap.h>
-
-/*
- * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
- * a file or fs handle.
- *
- * XFS_IOC_PATH_TO_FSHANDLE
- * returns fs handle for a mount point or path within that mount point
- * XFS_IOC_FD_TO_HANDLE
- * returns full handle for a FD opened in user space
- * XFS_IOC_PATH_TO_HANDLE
- * returns full handle for a path
- */
-STATIC int
-xfs_find_handle(
- unsigned int cmd,
- void __user *arg)
-{
- int hsize;
- xfs_handle_t handle;
- xfs_fsop_handlereq_t hreq;
- struct inode *inode;
- struct vnode *vp;
-
- if (copy_from_user(&hreq, arg, sizeof(hreq)))
- return -XFS_ERROR(EFAULT);
-
- memset((char *)&handle, 0, sizeof(handle));
-
- switch (cmd) {
- case XFS_IOC_PATH_TO_FSHANDLE:
- case XFS_IOC_PATH_TO_HANDLE: {
- struct nameidata nd;
- int error;
-
- error = user_path_walk_link((const char __user *)hreq.path, &nd);
- if (error)
- return error;
-
- ASSERT(nd.dentry);
- ASSERT(nd.dentry->d_inode);
- inode = igrab(nd.dentry->d_inode);
- path_release(&nd);
- break;
- }
-
- case XFS_IOC_FD_TO_HANDLE: {
- struct file *file;
-
- file = fget(hreq.fd);
- if (!file)
- return -EBADF;
-
- ASSERT(file->f_dentry);
- ASSERT(file->f_dentry->d_inode);
- inode = igrab(file->f_dentry->d_inode);
- fput(file);
- break;
- }
-
- default:
- ASSERT(0);
- return -XFS_ERROR(EINVAL);
- }
-
- if (inode->i_sb->s_magic != XFS_SB_MAGIC) {
- /* we're not in XFS anymore, Toto */
- iput(inode);
- return -XFS_ERROR(EINVAL);
- }
-
- switch (inode->i_mode & S_IFMT) {
- case S_IFREG:
- case S_IFDIR:
- case S_IFLNK:
- break;
- default:
- iput(inode);
- return -XFS_ERROR(EBADF);
- }
-
- /* we need the vnode */
- vp = LINVFS_GET_VP(inode);
-
- /* now we can grab the fsid */
- memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
- hsize = sizeof(xfs_fsid_t);
-
- if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
- xfs_inode_t *ip;
- int lock_mode;
-
- /* need to get access to the xfs_inode to read the generation */
- ip = xfs_vtoi(vp);
- ASSERT(ip);
- lock_mode = xfs_ilock_map_shared(ip);
-
- /* fill in fid section of handle from inode */
- handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) -
- sizeof(handle.ha_fid.xfs_fid_len);
- handle.ha_fid.xfs_fid_pad = 0;
- handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen;
- handle.ha_fid.xfs_fid_ino = ip->i_ino;
-
- xfs_iunlock_map_shared(ip, lock_mode);
-
- hsize = XFS_HSIZE(handle);
- }
-
- /* now copy our handle into the user buffer & write out the size */
- if (copy_to_user(hreq.ohandle, &handle, hsize) ||
- copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) {
- iput(inode);
- return -XFS_ERROR(EFAULT);
- }
-
- iput(inode);
- return 0;
-}
-
-
-/*
- * Convert userspace handle data into vnode (and inode).
- * We [ab]use the fact that all the fsop_handlereq ioctl calls
- * have a data structure argument whose first component is always
- * a xfs_fsop_handlereq_t, so we can cast to and from this type.
- * This allows us to optimise the copy_from_user calls and gives
- * a handy, shared routine.
- *
- * If no error, caller must always VN_RELE the returned vp.
- */
-STATIC int
-xfs_vget_fsop_handlereq(
- xfs_mount_t *mp,
- struct inode *parinode, /* parent inode pointer */
- xfs_fsop_handlereq_t *hreq,
- vnode_t **vp,
- struct inode **inode)
-{
- void __user *hanp;
- size_t hlen;
- xfs_fid_t *xfid;
- xfs_handle_t *handlep;
- xfs_handle_t handle;
- xfs_inode_t *ip;
- struct inode *inodep;
- vnode_t *vpp;
- xfs_ino_t ino;
- __u32 igen;
- int error;
-
- /*
- * Only allow handle opens under a directory.
- */
- if (!S_ISDIR(parinode->i_mode))
- return XFS_ERROR(ENOTDIR);
-
- hanp = hreq->ihandle;
- hlen = hreq->ihandlen;
- handlep = &handle;
-
- if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep))
- return XFS_ERROR(EINVAL);
- if (copy_from_user(handlep, hanp, hlen))
- return XFS_ERROR(EFAULT);
- if (hlen < sizeof(*handlep))
- memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
- if (hlen > sizeof(handlep->ha_fsid)) {
- if (handlep->ha_fid.xfs_fid_len !=
- (hlen - sizeof(handlep->ha_fsid)
- - sizeof(handlep->ha_fid.xfs_fid_len))
- || handlep->ha_fid.xfs_fid_pad)
- return XFS_ERROR(EINVAL);
- }
-
- /*
- * Crack the handle, obtain the inode # & generation #
- */
- xfid = (struct xfs_fid *)&handlep->ha_fid;
- if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) {
- ino = xfid->xfs_fid_ino;
- igen = xfid->xfs_fid_gen;
- } else {
- return XFS_ERROR(EINVAL);
- }
-
- /*
- * Get the XFS inode, building a vnode to go with it.
- */
- error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
- if (error)
- return error;
- if (ip == NULL)
- return XFS_ERROR(EIO);
- if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) {
- xfs_iput_new(ip, XFS_ILOCK_SHARED);
- return XFS_ERROR(ENOENT);
- }
-
- vpp = XFS_ITOV(ip);
- inodep = LINVFS_GET_IP(vpp);
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
- *vp = vpp;
- *inode = inodep;
- return 0;
-}
-
-STATIC int
-xfs_open_by_handle(
- xfs_mount_t *mp,
- void __user *arg,
- struct file *parfilp,
- struct inode *parinode)
-{
- int error;
- int new_fd;
- int permflag;
- struct file *filp;
- struct inode *inode;
- struct dentry *dentry;
- vnode_t *vp;
- xfs_fsop_handlereq_t hreq;
-
- if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
- if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode);
- if (error)
- return -error;
-
- /* Restrict xfs_open_by_handle to directories & regular files. */
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
- iput(inode);
- return -XFS_ERROR(EINVAL);
- }
-
-#if BITS_PER_LONG != 32
- hreq.oflags |= O_LARGEFILE;
-#endif
- /* Put open permission in namei format. */
- permflag = hreq.oflags;
- if ((permflag+1) & O_ACCMODE)
- permflag++;
- if (permflag & O_TRUNC)
- permflag |= 2;
-
- if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
- (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
- iput(inode);
- return -XFS_ERROR(EPERM);
- }
-
- if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
- iput(inode);
- return -XFS_ERROR(EACCES);
- }
-
- /* Can't write directories. */
- if ( S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
- iput(inode);
- return -XFS_ERROR(EISDIR);
- }
-
- if ((new_fd = get_unused_fd()) < 0) {
- iput(inode);
- return new_fd;
- }
-
- dentry = d_alloc_anon(inode);
- if (dentry == NULL) {
- iput(inode);
- put_unused_fd(new_fd);
- return -XFS_ERROR(ENOMEM);
- }
-
- /* Ensure umount returns EBUSY on umounts while this file is open. */
- mntget(parfilp->f_vfsmnt);
-
- /* Create file pointer. */
- filp = dentry_open(dentry, parfilp->f_vfsmnt, hreq.oflags);
- if (IS_ERR(filp)) {
- put_unused_fd(new_fd);
- return -XFS_ERROR(-PTR_ERR(filp));
- }
- if (inode->i_mode & S_IFREG)
- filp->f_op = &linvfs_invis_file_operations;
-
- fd_install(new_fd, filp);
- return new_fd;
-}
-
-STATIC int
-xfs_readlink_by_handle(
- xfs_mount_t *mp,
- void __user *arg,
- struct file *parfilp,
- struct inode *parinode)
-{
- int error;
- struct iovec aiov;
- struct uio auio;
- struct inode *inode;
- xfs_fsop_handlereq_t hreq;
- vnode_t *vp;
- __u32 olen;
-
- if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
- if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode);
- if (error)
- return -error;
-
- /* Restrict this handle operation to symlinks only. */
- if (!S_ISLNK(inode->i_mode)) {
- VN_RELE(vp);
- return -XFS_ERROR(EINVAL);
- }
-
- if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) {
- VN_RELE(vp);
- return -XFS_ERROR(EFAULT);
- }
- aiov.iov_len = olen;
- aiov.iov_base = hreq.ohandle;
-
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = 0;
- auio.uio_segflg = UIO_USERSPACE;
- auio.uio_resid = olen;
-
- VOP_READLINK(vp, &auio, IO_INVIS, NULL, error);
-
- VN_RELE(vp);
- return (olen - auio.uio_resid);
-}
-
-STATIC int
-xfs_fssetdm_by_handle(
- xfs_mount_t *mp,
- void __user *arg,
- struct file *parfilp,
- struct inode *parinode)
-{
- int error;
- struct fsdmidata fsd;
- xfs_fsop_setdm_handlereq_t dmhreq;
- struct inode *inode;
- bhv_desc_t *bdp;
- vnode_t *vp;
-
- if (!capable(CAP_MKNOD))
- return -XFS_ERROR(EPERM);
- if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &vp, &inode);
- if (error)
- return -error;
-
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
- VN_RELE(vp);
- return -XFS_ERROR(EPERM);
- }
-
- if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
- VN_RELE(vp);
- return -XFS_ERROR(EFAULT);
- }
-
- bdp = bhv_base_unlocked(VN_BHV_HEAD(vp));
- error = xfs_set_dmattrs(bdp, fsd.fsd_dmevmask, fsd.fsd_dmstate, NULL);
-
- VN_RELE(vp);
- if (error)
- return -error;
- return 0;
-}
-
-STATIC int
-xfs_attrlist_by_handle(
- xfs_mount_t *mp,
- void __user *arg,
- struct file *parfilp,
- struct inode *parinode)
-{
- int error;
- attrlist_cursor_kern_t *cursor;
- xfs_fsop_attrlist_handlereq_t al_hreq;
- struct inode *inode;
- vnode_t *vp;
- char *kbuf;
-
- if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
- if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
- return -XFS_ERROR(EFAULT);
- if (al_hreq.buflen > XATTR_LIST_MAX)
- return -XFS_ERROR(EINVAL);
-
- error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq,
- &vp, &inode);
- if (error)
- goto out;
-
- kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
- if (!kbuf)
- goto out_vn_rele;
-
- cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
- VOP_ATTR_LIST(vp, kbuf, al_hreq.buflen, al_hreq.flags,
- cursor, NULL, error);
- if (error)
- goto out_kfree;
-
- if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
- error = -EFAULT;
-
- out_kfree:
- kfree(kbuf);
- out_vn_rele:
- VN_RELE(vp);
- out:
- return -error;
-}
-
-STATIC int
-xfs_attrmulti_attr_get(
- struct vnode *vp,
- char *name,
- char __user *ubuf,
- __uint32_t *len,
- __uint32_t flags)
-{
- char *kbuf;
- int error = EFAULT;
-
- if (*len > XATTR_SIZE_MAX)
- return EINVAL;
- kbuf = kmalloc(*len, GFP_KERNEL);
- if (!kbuf)
- return ENOMEM;
-
- VOP_ATTR_GET(vp, name, kbuf, len, flags, NULL, error);
- if (error)
- goto out_kfree;
-
- if (copy_to_user(ubuf, kbuf, *len))
- error = EFAULT;
-
- out_kfree:
- kfree(kbuf);
- return error;
-}
-
-STATIC int
-xfs_attrmulti_attr_set(
- struct vnode *vp,
- char *name,
- const char __user *ubuf,
- __uint32_t len,
- __uint32_t flags)
-{
- char *kbuf;
- int error = EFAULT;
-
- if (IS_RDONLY(&vp->v_inode))
- return -EROFS;
- if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
- return EPERM;
- if (len > XATTR_SIZE_MAX)
- return EINVAL;
-
- kbuf = kmalloc(len, GFP_KERNEL);
- if (!kbuf)
- return ENOMEM;
-
- if (copy_from_user(kbuf, ubuf, len))
- goto out_kfree;
-
- VOP_ATTR_SET(vp, name, kbuf, len, flags, NULL, error);
-
- out_kfree:
- kfree(kbuf);
- return error;
-}
-
-STATIC int
-xfs_attrmulti_attr_remove(
- struct vnode *vp,
- char *name,
- __uint32_t flags)
-{
- int error;
-
-
- if (IS_RDONLY(&vp->v_inode))
- return -EROFS;
- if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
- return EPERM;
-
- VOP_ATTR_REMOVE(vp, name, flags, NULL, error);
- return error;
-}
-
-STATIC int
-xfs_attrmulti_by_handle(
- xfs_mount_t *mp,
- void __user *arg,
- struct file *parfilp,
- struct inode *parinode)
-{
- int error;
- xfs_attr_multiop_t *ops;
- xfs_fsop_attrmulti_handlereq_t am_hreq;
- struct inode *inode;
- vnode_t *vp;
- unsigned int i, size;
- char *attr_name;
-
- if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
- if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &vp, &inode);
- if (error)
- goto out;
-
- error = E2BIG;
- size = am_hreq.opcount * sizeof(attr_multiop_t);
- if (!size || size > 16 * PAGE_SIZE)
- goto out_vn_rele;
-
- error = ENOMEM;
- ops = kmalloc(size, GFP_KERNEL);
- if (!ops)
- goto out_vn_rele;
-
- error = EFAULT;
- if (copy_from_user(ops, am_hreq.ops, size))
- goto out_kfree_ops;
-
- attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
- if (!attr_name)
- goto out_kfree_ops;
-
-
- error = 0;
- for (i = 0; i < am_hreq.opcount; i++) {
- ops[i].am_error = strncpy_from_user(attr_name,
- ops[i].am_attrname, MAXNAMELEN);
- if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
- error = -ERANGE;
- if (ops[i].am_error < 0)
- break;
-
- switch (ops[i].am_opcode) {
- case ATTR_OP_GET:
- ops[i].am_error = xfs_attrmulti_attr_get(vp,
- attr_name, ops[i].am_attrvalue,
- &ops[i].am_length, ops[i].am_flags);
- break;
- case ATTR_OP_SET:
- ops[i].am_error = xfs_attrmulti_attr_set(vp,
- attr_name, ops[i].am_attrvalue,
- ops[i].am_length, ops[i].am_flags);
- break;
- case ATTR_OP_REMOVE:
- ops[i].am_error = xfs_attrmulti_attr_remove(vp,
- attr_name, ops[i].am_flags);
- break;
- default:
- ops[i].am_error = EINVAL;
- }
- }
-
- if (copy_to_user(am_hreq.ops, ops, size))
- error = XFS_ERROR(EFAULT);
-
- kfree(attr_name);
- out_kfree_ops:
- kfree(ops);
- out_vn_rele:
- VN_RELE(vp);
- out:
- return -error;
-}
-
-/* prototypes for a few of the stack-hungry cases that have
- * their own functions. Functions are defined after their use
- * so gcc doesn't get fancy and inline them with -03 */
-
-STATIC int
-xfs_ioc_space(
- bhv_desc_t *bdp,
- vnode_t *vp,
- struct file *filp,
- int flags,
- unsigned int cmd,
- void __user *arg);
-
-STATIC int
-xfs_ioc_bulkstat(
- xfs_mount_t *mp,
- unsigned int cmd,
- void __user *arg);
-
-STATIC int
-xfs_ioc_fsgeometry_v1(
- xfs_mount_t *mp,
- void __user *arg);
-
-STATIC int
-xfs_ioc_fsgeometry(
- xfs_mount_t *mp,
- void __user *arg);
-
-STATIC int
-xfs_ioc_xattr(
- vnode_t *vp,
- xfs_inode_t *ip,
- struct file *filp,
- unsigned int cmd,
- void __user *arg);
-
-STATIC int
-xfs_ioc_getbmap(
- bhv_desc_t *bdp,
- struct file *filp,
- int flags,
- unsigned int cmd,
- void __user *arg);
-
-STATIC int
-xfs_ioc_getbmapx(
- bhv_desc_t *bdp,
- void __user *arg);
-
-int
-xfs_ioctl(
- bhv_desc_t *bdp,
- struct inode *inode,
- struct file *filp,
- int ioflags,
- unsigned int cmd,
- void __user *arg)
-{
- int error;
- vnode_t *vp;
- xfs_inode_t *ip;
- xfs_mount_t *mp;
-
- vp = LINVFS_GET_VP(inode);
-
- vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address);
-
- ip = XFS_BHVTOI(bdp);
- mp = ip->i_mount;
-
- switch (cmd) {
-
- case XFS_IOC_ALLOCSP:
- case XFS_IOC_FREESP:
- case XFS_IOC_RESVSP:
- case XFS_IOC_UNRESVSP:
- case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP64:
- case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP64:
- /*
- * Only allow the sys admin to reserve space unless
- * unwritten extents are enabled.
- */
- if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg);
-
- case XFS_IOC_DIOINFO: {
- struct dioattr da;
- xfs_buftarg_t *target =
- (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
-
- da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
- da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
-
- if (copy_to_user(arg, &da, sizeof(da)))
- return -XFS_ERROR(EFAULT);
- return 0;
- }
-
- case XFS_IOC_FSBULKSTAT_SINGLE:
- case XFS_IOC_FSBULKSTAT:
- case XFS_IOC_FSINUMBERS:
- return xfs_ioc_bulkstat(mp, cmd, arg);
-
- case XFS_IOC_FSGEOMETRY_V1:
- return xfs_ioc_fsgeometry_v1(mp, arg);
-
- case XFS_IOC_FSGEOMETRY:
- return xfs_ioc_fsgeometry(mp, arg);
-
- case XFS_IOC_GETVERSION:
- case XFS_IOC_GETXFLAGS:
- case XFS_IOC_SETXFLAGS:
- case XFS_IOC_FSGETXATTR:
- case XFS_IOC_FSSETXATTR:
- case XFS_IOC_FSGETXATTRA:
- return xfs_ioc_xattr(vp, ip, filp, cmd, arg);
-
- case XFS_IOC_FSSETDM: {
- struct fsdmidata dmi;
-
- if (copy_from_user(&dmi, arg, sizeof(dmi)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_set_dmattrs(bdp, dmi.fsd_dmevmask, dmi.fsd_dmstate,
- NULL);
- return -error;
- }
-
- case XFS_IOC_GETBMAP:
- case XFS_IOC_GETBMAPA:
- return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg);
-
- case XFS_IOC_GETBMAPX:
- return xfs_ioc_getbmapx(bdp, arg);
-
- case XFS_IOC_FD_TO_HANDLE:
- case XFS_IOC_PATH_TO_HANDLE:
- case XFS_IOC_PATH_TO_FSHANDLE:
- return xfs_find_handle(cmd, arg);
-
- case XFS_IOC_OPEN_BY_HANDLE:
- return xfs_open_by_handle(mp, arg, filp, inode);
-
- case XFS_IOC_FSSETDM_BY_HANDLE:
- return xfs_fssetdm_by_handle(mp, arg, filp, inode);
-
- case XFS_IOC_READLINK_BY_HANDLE:
- return xfs_readlink_by_handle(mp, arg, filp, inode);
-
- case XFS_IOC_ATTRLIST_BY_HANDLE:
- return xfs_attrlist_by_handle(mp, arg, filp, inode);
-
- case XFS_IOC_ATTRMULTI_BY_HANDLE:
- return xfs_attrmulti_by_handle(mp, arg, filp, inode);
-
- case XFS_IOC_SWAPEXT: {
- error = xfs_swapext((struct xfs_swapext __user *)arg);
- return -error;
- }
-
- case XFS_IOC_FSCOUNTS: {
- xfs_fsop_counts_t out;
-
- error = xfs_fs_counts(mp, &out);
- if (error)
- return -error;
-
- if (copy_to_user(arg, &out, sizeof(out)))
- return -XFS_ERROR(EFAULT);
- return 0;
- }
-
- case XFS_IOC_SET_RESBLKS: {
- xfs_fsop_resblks_t inout;
- __uint64_t in;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (copy_from_user(&inout, arg, sizeof(inout)))
- return -XFS_ERROR(EFAULT);
-
- /* input parameter is passed in resblks field of structure */
- in = inout.resblks;
- error = xfs_reserve_blocks(mp, &in, &inout);
- if (error)
- return -error;
-
- if (copy_to_user(arg, &inout, sizeof(inout)))
- return -XFS_ERROR(EFAULT);
- return 0;
- }
-
- case XFS_IOC_GET_RESBLKS: {
- xfs_fsop_resblks_t out;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- error = xfs_reserve_blocks(mp, NULL, &out);
- if (error)
- return -error;
-
- if (copy_to_user(arg, &out, sizeof(out)))
- return -XFS_ERROR(EFAULT);
-
- return 0;
- }
-
- case XFS_IOC_FSGROWFSDATA: {
- xfs_growfs_data_t in;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_growfs_data(mp, &in);
- return -error;
- }
-
- case XFS_IOC_FSGROWFSLOG: {
- xfs_growfs_log_t in;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_growfs_log(mp, &in);
- return -error;
- }
-
- case XFS_IOC_FSGROWFSRT: {
- xfs_growfs_rt_t in;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_growfs_rt(mp, &in);
- return -error;
- }
-
- case XFS_IOC_FREEZE:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (inode->i_sb->s_frozen == SB_UNFROZEN)
- freeze_bdev(inode->i_sb->s_bdev);
- return 0;
-
- case XFS_IOC_THAW:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- if (inode->i_sb->s_frozen != SB_UNFROZEN)
- thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
- return 0;
-
- case XFS_IOC_GOINGDOWN: {
- __uint32_t in;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (get_user(in, (__uint32_t __user *)arg))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_fs_goingdown(mp, in);
- return -error;
- }
-
- case XFS_IOC_ERROR_INJECTION: {
- xfs_error_injection_t in;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_errortag_add(in.errtag, mp);
- return -error;
- }
-
- case XFS_IOC_ERROR_CLEARALL:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- error = xfs_errortag_clearall(mp);
- return -error;
-
- default:
- return -ENOTTY;
- }
-}
-
-STATIC int
-xfs_ioc_space(
- bhv_desc_t *bdp,
- vnode_t *vp,
- struct file *filp,
- int ioflags,
- unsigned int cmd,
- void __user *arg)
-{
- xfs_flock64_t bf;
- int attr_flags = 0;
- int error;
-
- if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
- return -XFS_ERROR(EPERM);
-
- if (!(filp->f_mode & FMODE_WRITE))
- return -XFS_ERROR(EBADF);
-
- if (!VN_ISREG(vp))
- return -XFS_ERROR(EINVAL);
-
- if (copy_from_user(&bf, arg, sizeof(bf)))
- return -XFS_ERROR(EFAULT);
-
- if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
- attr_flags |= ATTR_NONBLOCK;
- if (ioflags & IO_INVIS)
- attr_flags |= ATTR_DMI;
-
- error = xfs_change_file_space(bdp, cmd, &bf, filp->f_pos,
- NULL, attr_flags);
- return -error;
-}
-
-STATIC int
-xfs_ioc_bulkstat(
- xfs_mount_t *mp,
- unsigned int cmd,
- void __user *arg)
-{
- xfs_fsop_bulkreq_t bulkreq;
- int count; /* # of records returned */
- xfs_ino_t inlast; /* last inode number */
- int done;
- int error;
-
- /* done = 1 if there are more stats to get and if bulkstat */
- /* should be called again (unused here, but used in dmapi) */
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
-
- if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
- return -XFS_ERROR(EFAULT);
-
- if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
- return -XFS_ERROR(EFAULT);
-
- if ((count = bulkreq.icount) <= 0)
- return -XFS_ERROR(EINVAL);
-
- if (cmd == XFS_IOC_FSINUMBERS)
- error = xfs_inumbers(mp, &inlast, &count,
- bulkreq.ubuffer);
- else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
- error = xfs_bulkstat_single(mp, &inlast,
- bulkreq.ubuffer, &done);
- else { /* XFS_IOC_FSBULKSTAT */
- if (count == 1 && inlast != 0) {
- inlast++;
- error = xfs_bulkstat_single(mp, &inlast,
- bulkreq.ubuffer, &done);
- } else {
- error = xfs_bulkstat(mp, &inlast, &count,
- (bulkstat_one_pf)xfs_bulkstat_one, NULL,
- sizeof(xfs_bstat_t), bulkreq.ubuffer,
- BULKSTAT_FG_QUICK, &done);
- }
- }
-
- if (error)
- return -error;
-
- if (bulkreq.ocount != NULL) {
- if (copy_to_user(bulkreq.lastip, &inlast,
- sizeof(xfs_ino_t)))
- return -XFS_ERROR(EFAULT);
-
- if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
- return -XFS_ERROR(EFAULT);
- }
-
- return 0;
-}
-
-STATIC int
-xfs_ioc_fsgeometry_v1(
- xfs_mount_t *mp,
- void __user *arg)
-{
- xfs_fsop_geom_v1_t fsgeo;
- int error;
-
- error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3);
- if (error)
- return -error;
-
- if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-STATIC int
-xfs_ioc_fsgeometry(
- xfs_mount_t *mp,
- void __user *arg)
-{
- xfs_fsop_geom_t fsgeo;
- int error;
-
- error = xfs_fs_geometry(mp, &fsgeo, 4);
- if (error)
- return -error;
-
- if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-/*
- * Linux extended inode flags interface.
- */
-#define LINUX_XFLAG_SYNC 0x00000008 /* Synchronous updates */
-#define LINUX_XFLAG_IMMUTABLE 0x00000010 /* Immutable file */
-#define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */
-#define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */
-#define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */
-
-STATIC unsigned int
-xfs_merge_ioc_xflags(
- unsigned int flags,
- unsigned int start)
-{
- unsigned int xflags = start;
-
- if (flags & LINUX_XFLAG_IMMUTABLE)
- xflags |= XFS_XFLAG_IMMUTABLE;
- else
- xflags &= ~XFS_XFLAG_IMMUTABLE;
- if (flags & LINUX_XFLAG_APPEND)
- xflags |= XFS_XFLAG_APPEND;
- else
- xflags &= ~XFS_XFLAG_APPEND;
- if (flags & LINUX_XFLAG_SYNC)
- xflags |= XFS_XFLAG_SYNC;
- else
- xflags &= ~XFS_XFLAG_SYNC;
- if (flags & LINUX_XFLAG_NOATIME)
- xflags |= XFS_XFLAG_NOATIME;
- else
- xflags &= ~XFS_XFLAG_NOATIME;
- if (flags & LINUX_XFLAG_NODUMP)
- xflags |= XFS_XFLAG_NODUMP;
- else
- xflags &= ~XFS_XFLAG_NODUMP;
-
- return xflags;
-}
-
-STATIC unsigned int
-xfs_di2lxflags(
- __uint16_t di_flags)
-{
- unsigned int flags = 0;
-
- if (di_flags & XFS_DIFLAG_IMMUTABLE)
- flags |= LINUX_XFLAG_IMMUTABLE;
- if (di_flags & XFS_DIFLAG_APPEND)
- flags |= LINUX_XFLAG_APPEND;
- if (di_flags & XFS_DIFLAG_SYNC)
- flags |= LINUX_XFLAG_SYNC;
- if (di_flags & XFS_DIFLAG_NOATIME)
- flags |= LINUX_XFLAG_NOATIME;
- if (di_flags & XFS_DIFLAG_NODUMP)
- flags |= LINUX_XFLAG_NODUMP;
- return flags;
-}
-
-STATIC int
-xfs_ioc_xattr(
- vnode_t *vp,
- xfs_inode_t *ip,
- struct file *filp,
- unsigned int cmd,
- void __user *arg)
-{
- struct fsxattr fa;
- vattr_t va;
- int error;
- int attr_flags;
- unsigned int flags;
-
- switch (cmd) {
- case XFS_IOC_FSGETXATTR: {
- va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
- XFS_AT_NEXTENTS | XFS_AT_PROJID;
- VOP_GETATTR(vp, &va, 0, NULL, error);
- if (error)
- return -error;
-
- fa.fsx_xflags = va.va_xflags;
- fa.fsx_extsize = va.va_extsize;
- fa.fsx_nextents = va.va_nextents;
- fa.fsx_projid = va.va_projid;
-
- if (copy_to_user(arg, &fa, sizeof(fa)))
- return -XFS_ERROR(EFAULT);
- return 0;
- }
-
- case XFS_IOC_FSSETXATTR: {
- if (copy_from_user(&fa, arg, sizeof(fa)))
- return -XFS_ERROR(EFAULT);
-
- attr_flags = 0;
- if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
- attr_flags |= ATTR_NONBLOCK;
-
- va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
- va.va_xflags = fa.fsx_xflags;
- va.va_extsize = fa.fsx_extsize;
- va.va_projid = fa.fsx_projid;
-
- VOP_SETATTR(vp, &va, attr_flags, NULL, error);
- if (!error)
- vn_revalidate(vp); /* update Linux inode flags */
- return -error;
- }
-
- case XFS_IOC_FSGETXATTRA: {
- va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
- XFS_AT_ANEXTENTS | XFS_AT_PROJID;
- VOP_GETATTR(vp, &va, 0, NULL, error);
- if (error)
- return -error;
-
- fa.fsx_xflags = va.va_xflags;
- fa.fsx_extsize = va.va_extsize;
- fa.fsx_nextents = va.va_anextents;
- fa.fsx_projid = va.va_projid;
-
- if (copy_to_user(arg, &fa, sizeof(fa)))
- return -XFS_ERROR(EFAULT);
- return 0;
- }
-
- case XFS_IOC_GETXFLAGS: {
- flags = xfs_di2lxflags(ip->i_d.di_flags);
- if (copy_to_user(arg, &flags, sizeof(flags)))
- return -XFS_ERROR(EFAULT);
- return 0;
- }
-
- case XFS_IOC_SETXFLAGS: {
- if (copy_from_user(&flags, arg, sizeof(flags)))
- return -XFS_ERROR(EFAULT);
-
- if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \
- LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \
- LINUX_XFLAG_SYNC))
- return -XFS_ERROR(EOPNOTSUPP);
-
- attr_flags = 0;
- if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
- attr_flags |= ATTR_NONBLOCK;
-
- va.va_mask = XFS_AT_XFLAGS;
- va.va_xflags = xfs_merge_ioc_xflags(flags,
- xfs_ip2xflags(ip));
-
- VOP_SETATTR(vp, &va, attr_flags, NULL, error);
- if (!error)
- vn_revalidate(vp); /* update Linux inode flags */
- return -error;
- }
-
- case XFS_IOC_GETVERSION: {
- flags = LINVFS_GET_IP(vp)->i_generation;
- if (copy_to_user(arg, &flags, sizeof(flags)))
- return -XFS_ERROR(EFAULT);
- return 0;
- }
-
- default:
- return -ENOTTY;
- }
-}
-
-STATIC int
-xfs_ioc_getbmap(
- bhv_desc_t *bdp,
- struct file *filp,
- int ioflags,
- unsigned int cmd,
- void __user *arg)
-{
- struct getbmap bm;
- int iflags;
- int error;
-
- if (copy_from_user(&bm, arg, sizeof(bm)))
- return -XFS_ERROR(EFAULT);
-
- if (bm.bmv_count < 2)
- return -XFS_ERROR(EINVAL);
-
- iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
- if (ioflags & IO_INVIS)
- iflags |= BMV_IF_NO_DMAPI_READ;
-
- error = xfs_getbmap(bdp, &bm, (struct getbmap __user *)arg+1, iflags);
- if (error)
- return -error;
-
- if (copy_to_user(arg, &bm, sizeof(bm)))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-STATIC int
-xfs_ioc_getbmapx(
- bhv_desc_t *bdp,
- void __user *arg)
-{
- struct getbmapx bmx;
- struct getbmap bm;
- int iflags;
- int error;
-
- if (copy_from_user(&bmx, arg, sizeof(bmx)))
- return -XFS_ERROR(EFAULT);
-
- if (bmx.bmv_count < 2)
- return -XFS_ERROR(EINVAL);
-
- /*
- * Map input getbmapx structure to a getbmap
- * structure for xfs_getbmap.
- */
- GETBMAP_CONVERT(bmx, bm);
-
- iflags = bmx.bmv_iflags;
-
- if (iflags & (~BMV_IF_VALID))
- return -XFS_ERROR(EINVAL);
-
- iflags |= BMV_IF_EXTENDED;
-
- error = xfs_getbmap(bdp, &bm, (struct getbmapx __user *)arg+1, iflags);
- if (error)
- return -error;
-
- GETBMAP_CONVERT(bm, bmx);
-
- if (copy_to_user(arg, &bmx, sizeof(bmx)))
- return -XFS_ERROR(EFAULT);
-
- return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
deleted file mode 100644
index a7c9ba1a9f7..00000000000
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include <linux/config.h>
-#include <linux/compat.h>
-#include <linux/init.h>
-#include <linux/ioctl.h>
-#include <linux/syscalls.h>
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <asm/uaccess.h>
-#include "xfs.h"
-#include "xfs_types.h"
-#include "xfs_fs.h"
-#include "xfs_vfs.h"
-#include "xfs_vnode.h"
-#include "xfs_dfrag.h"
-
-#define _NATIVE_IOC(cmd, type) \
- _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
-
-#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
-#define BROKEN_X86_ALIGNMENT
-/* on ia32 l_start is on a 32-bit boundary */
-typedef struct xfs_flock64_32 {
- __s16 l_type;
- __s16 l_whence;
- __s64 l_start __attribute__((packed));
- /* len == 0 means until end of file */
- __s64 l_len __attribute__((packed));
- __s32 l_sysid;
- __u32 l_pid;
- __s32 l_pad[4]; /* reserve area */
-} xfs_flock64_32_t;
-
-#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32)
-#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32)
-#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32)
-#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32)
-#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32)
-#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32)
-#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32)
-#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32)
-
-/* just account for different alignment */
-STATIC unsigned long
-xfs_ioctl32_flock(
- unsigned long arg)
-{
- xfs_flock64_32_t __user *p32 = (void __user *)arg;
- xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p));
-
- if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) ||
- copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) ||
- copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) ||
- copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) ||
- copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) ||
- copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) ||
- copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32)))
- return -EFAULT;
-
- return (unsigned long)p;
-}
-
-#else
-
-typedef struct xfs_fsop_bulkreq32 {
- compat_uptr_t lastip; /* last inode # pointer */
- __s32 icount; /* count of entries in buffer */
- compat_uptr_t ubuffer; /* user buffer for inode desc. */
- __s32 ocount; /* output count pointer */
-} xfs_fsop_bulkreq32_t;
-
-STATIC unsigned long
-xfs_ioctl32_bulkstat(
- unsigned long arg)
-{
- xfs_fsop_bulkreq32_t __user *p32 = (void __user *)arg;
- xfs_fsop_bulkreq_t __user *p = compat_alloc_user_space(sizeof(*p));
- u32 addr;
-
- if (get_user(addr, &p32->lastip) ||
- put_user(compat_ptr(addr), &p->lastip) ||
- copy_in_user(&p->icount, &p32->icount, sizeof(s32)) ||
- get_user(addr, &p32->ubuffer) ||
- put_user(compat_ptr(addr), &p->ubuffer) ||
- get_user(addr, &p32->ocount) ||
- put_user(compat_ptr(addr), &p->ocount))
- return -EFAULT;
-
- return (unsigned long)p;
-}
-#endif
-
-STATIC long
-__linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
-{
- int error;
- struct inode *inode = f->f_dentry->d_inode;
- vnode_t *vp = LINVFS_GET_VP(inode);
-
- switch (cmd) {
- case XFS_IOC_DIOINFO:
- case XFS_IOC_FSGEOMETRY_V1:
- case XFS_IOC_FSGEOMETRY:
- case XFS_IOC_GETVERSION:
- case XFS_IOC_GETXFLAGS:
- case XFS_IOC_SETXFLAGS:
- case XFS_IOC_FSGETXATTR:
- case XFS_IOC_FSSETXATTR:
- case XFS_IOC_FSGETXATTRA:
- case XFS_IOC_FSSETDM:
- case XFS_IOC_GETBMAP:
- case XFS_IOC_GETBMAPA:
- case XFS_IOC_GETBMAPX:
-/* not handled
- case XFS_IOC_FD_TO_HANDLE:
- case XFS_IOC_PATH_TO_HANDLE:
- case XFS_IOC_PATH_TO_FSHANDLE:
- case XFS_IOC_OPEN_BY_HANDLE:
- case XFS_IOC_FSSETDM_BY_HANDLE:
- case XFS_IOC_READLINK_BY_HANDLE:
- case XFS_IOC_ATTRLIST_BY_HANDLE:
- case XFS_IOC_ATTRMULTI_BY_HANDLE:
-*/
- case XFS_IOC_FSCOUNTS:
- case XFS_IOC_SET_RESBLKS:
- case XFS_IOC_GET_RESBLKS:
- case XFS_IOC_FSGROWFSDATA:
- case XFS_IOC_FSGROWFSLOG:
- case XFS_IOC_FSGROWFSRT:
- case XFS_IOC_FREEZE:
- case XFS_IOC_THAW:
- case XFS_IOC_GOINGDOWN:
- case XFS_IOC_ERROR_INJECTION:
- case XFS_IOC_ERROR_CLEARALL:
- break;
-
-#ifdef BROKEN_X86_ALIGNMENT
- /* xfs_flock_t has wrong u32 vs u64 alignment */
- case XFS_IOC_ALLOCSP_32:
- case XFS_IOC_FREESP_32:
- case XFS_IOC_ALLOCSP64_32:
- case XFS_IOC_FREESP64_32:
- case XFS_IOC_RESVSP_32:
- case XFS_IOC_UNRESVSP_32:
- case XFS_IOC_RESVSP64_32:
- case XFS_IOC_UNRESVSP64_32:
- arg = xfs_ioctl32_flock(arg);
- cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
- break;
-
-#else /* These are handled fine if no alignment issues */
- case XFS_IOC_ALLOCSP:
- case XFS_IOC_FREESP:
- case XFS_IOC_RESVSP:
- case XFS_IOC_UNRESVSP:
- case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP64:
- case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP64:
- break;
-
- /* xfs_bstat_t still has wrong u32 vs u64 alignment */
- case XFS_IOC_SWAPEXT:
- break;
-
- case XFS_IOC_FSBULKSTAT_SINGLE:
- case XFS_IOC_FSBULKSTAT:
- case XFS_IOC_FSINUMBERS:
- arg = xfs_ioctl32_bulkstat(arg);
- break;
-#endif
- default:
- return -ENOIOCTLCMD;
- }
-
- VOP_IOCTL(vp, inode, f, mode, cmd, (void __user *)arg, error);
- VMODIFY(vp);
-
- return error;
-}
-
-long
-linvfs_compat_ioctl(
- struct file *f,
- unsigned cmd,
- unsigned long arg)
-{
- return __linvfs_compat_ioctl(0, f, cmd, arg);
-}
-
-long
-linvfs_compat_invis_ioctl(
- struct file *f,
- unsigned cmd,
- unsigned long arg)
-{
- return __linvfs_compat_ioctl(IO_INVIS, f, cmd, arg);
-}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
deleted file mode 100644
index eda7919b70a..00000000000
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ /dev/null
@@ -1,858 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-
-#include <linux/capability.h>
-#include <linux/xattr.h>
-#include <linux/namei.h>
-#include <linux/security.h>
-
-/*
- * Get a XFS inode from a given vnode.
- */
-xfs_inode_t *
-xfs_vtoi(
- struct vnode *vp)
-{
- bhv_desc_t *bdp;
-
- bdp = bhv_lookup_range(VN_BHV_HEAD(vp),
- VNODE_POSITION_XFS, VNODE_POSITION_XFS);
- if (unlikely(bdp == NULL))
- return NULL;
- return XFS_BHVTOI(bdp);
-}
-
-/*
- * Bring the atime in the XFS inode uptodate.
- * Used before logging the inode to disk or when the Linux inode goes away.
- */
-void
-xfs_synchronize_atime(
- xfs_inode_t *ip)
-{
- vnode_t *vp;
-
- vp = XFS_ITOV_NULL(ip);
- if (vp) {
- struct inode *inode = &vp->v_inode;
- ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
- ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
- }
-}
-
-/*
- * Change the requested timestamp in the given inode.
- * We don't lock across timestamp updates, and we don't log them but
- * we do record the fact that there is dirty information in core.
- *
- * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG
- * with XFS_ICHGTIME_ACC to be sure that access time
- * update will take. Calling first with XFS_ICHGTIME_ACC
- * and then XFS_ICHGTIME_MOD may fail to modify the access
- * timestamp if the filesystem is mounted noacctm.
- */
-void
-xfs_ichgtime(
- xfs_inode_t *ip,
- int flags)
-{
- struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
- timespec_t tv;
-
- nanotime(&tv);
- if (flags & XFS_ICHGTIME_MOD) {
- inode->i_mtime = tv;
- ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
- ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
- }
- if (flags & XFS_ICHGTIME_ACC) {
- inode->i_atime = tv;
- ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec;
- ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec;
- }
- if (flags & XFS_ICHGTIME_CHG) {
- inode->i_ctime = tv;
- ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec;
- ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec;
- }
-
- /*
- * We update the i_update_core field _after_ changing
- * the timestamps in order to coordinate properly with
- * xfs_iflush() so that we don't lose timestamp updates.
- * This keeps us from having to hold the inode lock
- * while doing this. We use the SYNCHRONIZE macro to
- * ensure that the compiler does not reorder the update
- * of i_update_core above the timestamp updates above.
- */
- SYNCHRONIZE();
- ip->i_update_core = 1;
- if (!(inode->i_state & I_LOCK))
- mark_inode_dirty_sync(inode);
-}
-
-/*
- * Variant on the above which avoids querying the system clock
- * in situations where we know the Linux inode timestamps have
- * just been updated (and so we can update our inode cheaply).
- */
-void
-xfs_ichgtime_fast(
- xfs_inode_t *ip,
- struct inode *inode,
- int flags)
-{
- timespec_t *tvp;
-
- /*
- * Atime updates for read() & friends are handled lazily now, and
- * explicit updates must go through xfs_ichgtime()
- */
- ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
-
- /*
- * We're not supposed to change timestamps in readonly-mounted
- * filesystems. Throw it away if anyone asks us.
- */
- if (unlikely(IS_RDONLY(inode)))
- return;
-
- if (flags & XFS_ICHGTIME_MOD) {
- tvp = &inode->i_mtime;
- ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
- ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;
- }
- if (flags & XFS_ICHGTIME_CHG) {
- tvp = &inode->i_ctime;
- ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec;
- ip->i_d.di_ctime.t_nsec = (__int32_t)tvp->tv_nsec;
- }
-
- /*
- * We update the i_update_core field _after_ changing
- * the timestamps in order to coordinate properly with
- * xfs_iflush() so that we don't lose timestamp updates.
- * This keeps us from having to hold the inode lock
- * while doing this. We use the SYNCHRONIZE macro to
- * ensure that the compiler does not reorder the update
- * of i_update_core above the timestamp updates above.
- */
- SYNCHRONIZE();
- ip->i_update_core = 1;
- if (!(inode->i_state & I_LOCK))
- mark_inode_dirty_sync(inode);
-}
-
-
-/*
- * Pull the link count and size up from the xfs inode to the linux inode
- */
-STATIC void
-validate_fields(
- struct inode *ip)
-{
- vnode_t *vp = LINVFS_GET_VP(ip);
- vattr_t va;
- int error;
-
- va.va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
- VOP_GETATTR(vp, &va, ATTR_LAZY, NULL, error);
- if (likely(!error)) {
- ip->i_nlink = va.va_nlink;
- ip->i_blocks = va.va_nblocks;
-
- /* we're under i_mutex so i_size can't change under us */
- if (i_size_read(ip) != va.va_size)
- i_size_write(ip, va.va_size);
- }
-}
-
-/*
- * Hook in SELinux. This is not quite correct yet, what we really need
- * here (as we do for default ACLs) is a mechanism by which creation of
- * these attrs can be journalled at inode creation time (along with the
- * inode, of course, such that log replay can't cause these to be lost).
- */
-STATIC int
-linvfs_init_security(
- struct vnode *vp,
- struct inode *dir)
-{
- struct inode *ip = LINVFS_GET_IP(vp);
- size_t length;
- void *value;
- char *name;
- int error;
-
- error = security_inode_init_security(ip, dir, &name, &value, &length);
- if (error) {
- if (error == -EOPNOTSUPP)
- return 0;
- return -error;
- }
-
- VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error);
- if (!error)
- VMODIFY(vp);
-
- kfree(name);
- kfree(value);
- return error;
-}
-
-/*
- * Determine whether a process has a valid fs_struct (kernel daemons
- * like knfsd don't have an fs_struct).
- *
- * XXX(hch): nfsd is broken, better fix it instead.
- */
-STATIC inline int
-has_fs_struct(struct task_struct *task)
-{
- return (task->fs != init_task.fs);
-}
-
-STATIC inline void
-cleanup_inode(
- vnode_t *dvp,
- vnode_t *vp,
- struct dentry *dentry,
- int mode)
-{
- struct dentry teardown = {};
- int err2;
-
- /* Oh, the horror.
- * If we can't add the ACL or we fail in
- * linvfs_init_security we must back out.
- * ENOSPC can hit here, among other things.
- */
- teardown.d_inode = LINVFS_GET_IP(vp);
- teardown.d_name = dentry->d_name;
-
- if (S_ISDIR(mode))
- VOP_RMDIR(dvp, &teardown, NULL, err2);
- else
- VOP_REMOVE(dvp, &teardown, NULL, err2);
- VN_RELE(vp);
-}
-
-STATIC int
-linvfs_mknod(
- struct inode *dir,
- struct dentry *dentry,
- int mode,
- dev_t rdev)
-{
- struct inode *ip;
- vattr_t va;
- vnode_t *vp = NULL, *dvp = LINVFS_GET_VP(dir);
- xfs_acl_t *default_acl = NULL;
- attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS;
- int error;
-
- /*
- * Irix uses Missed'em'V split, but doesn't want to see
- * the upper 5 bits of (14bit) major.
- */
- if (!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)
- return -EINVAL;
-
- if (test_default_acl && test_default_acl(dvp)) {
- if (!_ACL_ALLOC(default_acl))
- return -ENOMEM;
- if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
- _ACL_FREE(default_acl);
- default_acl = NULL;
- }
- }
-
- if (IS_POSIXACL(dir) && !default_acl && has_fs_struct(current))
- mode &= ~current->fs->umask;
-
- memset(&va, 0, sizeof(va));
- va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
- va.va_mode = mode;
-
- switch (mode & S_IFMT) {
- case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
- va.va_rdev = sysv_encode_dev(rdev);
- va.va_mask |= XFS_AT_RDEV;
- /*FALLTHROUGH*/
- case S_IFREG:
- VOP_CREATE(dvp, dentry, &va, &vp, NULL, error);
- break;
- case S_IFDIR:
- VOP_MKDIR(dvp, dentry, &va, &vp, NULL, error);
- break;
- default:
- error = EINVAL;
- break;
- }
-
- if (!error)
- {
- error = linvfs_init_security(vp, dir);
- if (error)
- cleanup_inode(dvp, vp, dentry, mode);
- }
-
- if (default_acl) {
- if (!error) {
- error = _ACL_INHERIT(vp, &va, default_acl);
- if (!error)
- VMODIFY(vp);
- else
- cleanup_inode(dvp, vp, dentry, mode);
- }
- _ACL_FREE(default_acl);
- }
-
- if (!error) {
- ASSERT(vp);
- ip = LINVFS_GET_IP(vp);
-
- if (S_ISCHR(mode) || S_ISBLK(mode))
- ip->i_rdev = rdev;
- else if (S_ISDIR(mode))
- validate_fields(ip);
- d_instantiate(dentry, ip);
- validate_fields(dir);
- }
- return -error;
-}
-
-STATIC int
-linvfs_create(
- struct inode *dir,
- struct dentry *dentry,
- int mode,
- struct nameidata *nd)
-{
- return linvfs_mknod(dir, dentry, mode, 0);
-}
-
-STATIC int
-linvfs_mkdir(
- struct inode *dir,
- struct dentry *dentry,
- int mode)
-{
- return linvfs_mknod(dir, dentry, mode|S_IFDIR, 0);
-}
-
-STATIC struct dentry *
-linvfs_lookup(
- struct inode *dir,
- struct dentry *dentry,
- struct nameidata *nd)
-{
- struct vnode *vp = LINVFS_GET_VP(dir), *cvp;
- int error;
-
- if (dentry->d_name.len >= MAXNAMELEN)
- return ERR_PTR(-ENAMETOOLONG);
-
- VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error);
- if (error) {
- if (unlikely(error != ENOENT))
- return ERR_PTR(-error);
- d_add(dentry, NULL);
- return NULL;
- }
-
- return d_splice_alias(LINVFS_GET_IP(cvp), dentry);
-}
-
-STATIC int
-linvfs_link(
- struct dentry *old_dentry,
- struct inode *dir,
- struct dentry *dentry)
-{
- struct inode *ip; /* inode of guy being linked to */
- vnode_t *tdvp; /* target directory for new name/link */
- vnode_t *vp; /* vp of name being linked */
- int error;
-
- ip = old_dentry->d_inode; /* inode being linked to */
- if (S_ISDIR(ip->i_mode))
- return -EPERM;
-
- tdvp = LINVFS_GET_VP(dir);
- vp = LINVFS_GET_VP(ip);
-
- VOP_LINK(tdvp, vp, dentry, NULL, error);
- if (!error) {
- VMODIFY(tdvp);
- VN_HOLD(vp);
- validate_fields(ip);
- d_instantiate(dentry, ip);
- }
- return -error;
-}
-
-STATIC int
-linvfs_unlink(
- struct inode *dir,
- struct dentry *dentry)
-{
- struct inode *inode;
- vnode_t *dvp; /* directory containing name to remove */
- int error;
-
- inode = dentry->d_inode;
- dvp = LINVFS_GET_VP(dir);
-
- VOP_REMOVE(dvp, dentry, NULL, error);
- if (!error) {
- validate_fields(dir); /* For size only */
- validate_fields(inode);
- }
-
- return -error;
-}
-
-STATIC int
-linvfs_symlink(
- struct inode *dir,
- struct dentry *dentry,
- const char *symname)
-{
- struct inode *ip;
- vattr_t va;
- vnode_t *dvp; /* directory containing name of symlink */
- vnode_t *cvp; /* used to lookup symlink to put in dentry */
- int error;
-
- dvp = LINVFS_GET_VP(dir);
- cvp = NULL;
-
- memset(&va, 0, sizeof(va));
- va.va_mode = S_IFLNK |
- (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
- va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
-
- error = 0;
- VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
- if (likely(!error && cvp)) {
- error = linvfs_init_security(cvp, dir);
- if (likely(!error)) {
- ip = LINVFS_GET_IP(cvp);
- d_instantiate(dentry, ip);
- validate_fields(dir);
- validate_fields(ip);
- }
- }
- return -error;
-}
-
-STATIC int
-linvfs_rmdir(
- struct inode *dir,
- struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- vnode_t *dvp = LINVFS_GET_VP(dir);
- int error;
-
- VOP_RMDIR(dvp, dentry, NULL, error);
- if (!error) {
- validate_fields(inode);
- validate_fields(dir);
- }
- return -error;
-}
-
-STATIC int
-linvfs_rename(
- struct inode *odir,
- struct dentry *odentry,
- struct inode *ndir,
- struct dentry *ndentry)
-{
- struct inode *new_inode = ndentry->d_inode;
- vnode_t *fvp; /* from directory */
- vnode_t *tvp; /* target directory */
- int error;
-
- fvp = LINVFS_GET_VP(odir);
- tvp = LINVFS_GET_VP(ndir);
-
- VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
- if (error)
- return -error;
-
- if (new_inode)
- validate_fields(new_inode);
-
- validate_fields(odir);
- if (ndir != odir)
- validate_fields(ndir);
- return 0;
-}
-
-/*
- * careful here - this function can get called recursively, so
- * we need to be very careful about how much stack we use.
- * uio is kmalloced for this reason...
- */
-STATIC void *
-linvfs_follow_link(
- struct dentry *dentry,
- struct nameidata *nd)
-{
- vnode_t *vp;
- uio_t *uio;
- iovec_t iov;
- int error;
- char *link;
-
- ASSERT(dentry);
- ASSERT(nd);
-
- link = (char *)kmalloc(MAXPATHLEN+1, GFP_KERNEL);
- if (!link) {
- nd_set_link(nd, ERR_PTR(-ENOMEM));
- return NULL;
- }
-
- uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL);
- if (!uio) {
- kfree(link);
- nd_set_link(nd, ERR_PTR(-ENOMEM));
- return NULL;
- }
-
- vp = LINVFS_GET_VP(dentry->d_inode);
-
- iov.iov_base = link;
- iov.iov_len = MAXPATHLEN;
-
- uio->uio_iov = &iov;
- uio->uio_offset = 0;
- uio->uio_segflg = UIO_SYSSPACE;
- uio->uio_resid = MAXPATHLEN;
- uio->uio_iovcnt = 1;
-
- VOP_READLINK(vp, uio, 0, NULL, error);
- if (error) {
- kfree(link);
- link = ERR_PTR(-error);
- } else {
- link[MAXPATHLEN - uio->uio_resid] = '\0';
- }
- kfree(uio);
-
- nd_set_link(nd, link);
- return NULL;
-}
-
-STATIC void
-linvfs_put_link(
- struct dentry *dentry,
- struct nameidata *nd,
- void *p)
-{
- char *s = nd_get_link(nd);
-
- if (!IS_ERR(s))
- kfree(s);
-}
-
-#ifdef CONFIG_XFS_POSIX_ACL
-STATIC int
-linvfs_permission(
- struct inode *inode,
- int mode,
- struct nameidata *nd)
-{
- vnode_t *vp = LINVFS_GET_VP(inode);
- int error;
-
- mode <<= 6; /* convert from linux to vnode access bits */
- VOP_ACCESS(vp, mode, NULL, error);
- return -error;
-}
-#else
-#define linvfs_permission NULL
-#endif
-
-STATIC int
-linvfs_getattr(
- struct vfsmount *mnt,
- struct dentry *dentry,
- struct kstat *stat)
-{
- struct inode *inode = dentry->d_inode;
- vnode_t *vp = LINVFS_GET_VP(inode);
- int error = 0;
-
- if (unlikely(vp->v_flag & VMODIFIED))
- error = vn_revalidate(vp);
- if (!error)
- generic_fillattr(inode, stat);
- return 0;
-}
-
-STATIC int
-linvfs_setattr(
- struct dentry *dentry,
- struct iattr *attr)
-{
- struct inode *inode = dentry->d_inode;
- unsigned int ia_valid = attr->ia_valid;
- vnode_t *vp = LINVFS_GET_VP(inode);
- vattr_t vattr;
- int flags = 0;
- int error;
-
- memset(&vattr, 0, sizeof(vattr_t));
- if (ia_valid & ATTR_UID) {
- vattr.va_mask |= XFS_AT_UID;
- vattr.va_uid = attr->ia_uid;
- }
- if (ia_valid & ATTR_GID) {
- vattr.va_mask |= XFS_AT_GID;
- vattr.va_gid = attr->ia_gid;
- }
- if (ia_valid & ATTR_SIZE) {
- vattr.va_mask |= XFS_AT_SIZE;
- vattr.va_size = attr->ia_size;
- }
- if (ia_valid & ATTR_ATIME) {
- vattr.va_mask |= XFS_AT_ATIME;
- vattr.va_atime = attr->ia_atime;
- }
- if (ia_valid & ATTR_MTIME) {
- vattr.va_mask |= XFS_AT_MTIME;
- vattr.va_mtime = attr->ia_mtime;
- }
- if (ia_valid & ATTR_CTIME) {
- vattr.va_mask |= XFS_AT_CTIME;
- vattr.va_ctime = attr->ia_ctime;
- }
- if (ia_valid & ATTR_MODE) {
- vattr.va_mask |= XFS_AT_MODE;
- vattr.va_mode = attr->ia_mode;
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
- inode->i_mode &= ~S_ISGID;
- }
-
- if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
- flags |= ATTR_UTIME;
-#ifdef ATTR_NO_BLOCK
- if ((ia_valid & ATTR_NO_BLOCK))
- flags |= ATTR_NONBLOCK;
-#endif
-
- VOP_SETATTR(vp, &vattr, flags, NULL, error);
- if (error)
- return -error;
- vn_revalidate(vp);
- return error;
-}
-
-STATIC void
-linvfs_truncate(
- struct inode *inode)
-{
- block_truncate_page(inode->i_mapping, inode->i_size, linvfs_get_block);
-}
-
-STATIC int
-linvfs_setxattr(
- struct dentry *dentry,
- const char *name,
- const void *data,
- size_t size,
- int flags)
-{
- vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
- char *attr = (char *)name;
- attrnames_t *namesp;
- int xflags = 0;
- int error;
-
- namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- attr += namesp->attr_namelen;
- error = namesp->attr_capable(vp, NULL);
- if (error)
- return error;
-
- /* Convert Linux syscall to XFS internal ATTR flags */
- if (flags & XATTR_CREATE)
- xflags |= ATTR_CREATE;
- if (flags & XATTR_REPLACE)
- xflags |= ATTR_REPLACE;
- xflags |= namesp->attr_flag;
- return namesp->attr_set(vp, attr, (void *)data, size, xflags);
-}
-
-STATIC ssize_t
-linvfs_getxattr(
- struct dentry *dentry,
- const char *name,
- void *data,
- size_t size)
-{
- vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
- char *attr = (char *)name;
- attrnames_t *namesp;
- int xflags = 0;
- ssize_t error;
-
- namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- attr += namesp->attr_namelen;
- error = namesp->attr_capable(vp, NULL);
- if (error)
- return error;
-
- /* Convert Linux syscall to XFS internal ATTR flags */
- if (!size) {
- xflags |= ATTR_KERNOVAL;
- data = NULL;
- }
- xflags |= namesp->attr_flag;
- return namesp->attr_get(vp, attr, (void *)data, size, xflags);
-}
-
-STATIC ssize_t
-linvfs_listxattr(
- struct dentry *dentry,
- char *data,
- size_t size)
-{
- vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
- int error, xflags = ATTR_KERNAMELS;
- ssize_t result;
-
- if (!size)
- xflags |= ATTR_KERNOVAL;
- xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
-
- error = attr_generic_list(vp, data, size, xflags, &result);
- if (error < 0)
- return error;
- return result;
-}
-
-STATIC int
-linvfs_removexattr(
- struct dentry *dentry,
- const char *name)
-{
- vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
- char *attr = (char *)name;
- attrnames_t *namesp;
- int xflags = 0;
- int error;
-
- namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- attr += namesp->attr_namelen;
- error = namesp->attr_capable(vp, NULL);
- if (error)
- return error;
- xflags |= namesp->attr_flag;
- return namesp->attr_remove(vp, attr, xflags);
-}
-
-
-struct inode_operations linvfs_file_inode_operations = {
- .permission = linvfs_permission,
- .truncate = linvfs_truncate,
- .getattr = linvfs_getattr,
- .setattr = linvfs_setattr,
- .setxattr = linvfs_setxattr,
- .getxattr = linvfs_getxattr,
- .listxattr = linvfs_listxattr,
- .removexattr = linvfs_removexattr,
-};
-
-struct inode_operations linvfs_dir_inode_operations = {
- .create = linvfs_create,
- .lookup = linvfs_lookup,
- .link = linvfs_link,
- .unlink = linvfs_unlink,
- .symlink = linvfs_symlink,
- .mkdir = linvfs_mkdir,
- .rmdir = linvfs_rmdir,
- .mknod = linvfs_mknod,
- .rename = linvfs_rename,
- .permission = linvfs_permission,
- .getattr = linvfs_getattr,
- .setattr = linvfs_setattr,
- .setxattr = linvfs_setxattr,
- .getxattr = linvfs_getxattr,
- .listxattr = linvfs_listxattr,
- .removexattr = linvfs_removexattr,
-};
-
-struct inode_operations linvfs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = linvfs_follow_link,
- .put_link = linvfs_put_link,
- .permission = linvfs_permission,
- .getattr = linvfs_getattr,
- .setattr = linvfs_setattr,
- .setxattr = linvfs_setxattr,
- .getxattr = linvfs_getxattr,
- .listxattr = linvfs_listxattr,
- .removexattr = linvfs_removexattr,
-};
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
deleted file mode 100644
index e0ab45fbfeb..00000000000
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ /dev/null
@@ -1,1021 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
-#include "xfs_attr.h"
-#include "xfs_inode_item.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_iomap.h"
-
-#include <linux/capability.h>
-#include <linux/writeback.h>
-
-
-#if defined(XFS_RW_TRACE)
-void
-xfs_rw_enter_trace(
- int tag,
- xfs_iocore_t *io,
- void *data,
- size_t segs,
- loff_t offset,
- int ioflags)
-{
- xfs_inode_t *ip = XFS_IO_INODE(io);
-
- if (ip->i_rwtrace == NULL)
- return;
- ktrace_enter(ip->i_rwtrace,
- (void *)(unsigned long)tag,
- (void *)ip,
- (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
- (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
- (void *)data,
- (void *)((unsigned long)segs),
- (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
- (void *)((unsigned long)(offset & 0xffffffff)),
- (void *)((unsigned long)ioflags),
- (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
- (void *)((unsigned long)(io->io_new_size & 0xffffffff)),
- (void *)NULL,
- (void *)NULL,
- (void *)NULL,
- (void *)NULL,
- (void *)NULL);
-}
-
-void
-xfs_inval_cached_trace(
- xfs_iocore_t *io,
- xfs_off_t offset,
- xfs_off_t len,
- xfs_off_t first,
- xfs_off_t last)
-{
- xfs_inode_t *ip = XFS_IO_INODE(io);
-
- if (ip->i_rwtrace == NULL)
- return;
- ktrace_enter(ip->i_rwtrace,
- (void *)(__psint_t)XFS_INVAL_CACHED,
- (void *)ip,
- (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
- (void *)((unsigned long)(offset & 0xffffffff)),
- (void *)((unsigned long)((len >> 32) & 0xffffffff)),
- (void *)((unsigned long)(len & 0xffffffff)),
- (void *)((unsigned long)((first >> 32) & 0xffffffff)),
- (void *)((unsigned long)(first & 0xffffffff)),
- (void *)((unsigned long)((last >> 32) & 0xffffffff)),
- (void *)((unsigned long)(last & 0xffffffff)),
- (void *)NULL,
- (void *)NULL,
- (void *)NULL,
- (void *)NULL,
- (void *)NULL,
- (void *)NULL);
-}
-#endif
-
-/*
- * xfs_iozero
- *
- * xfs_iozero clears the specified range of buffer supplied,
- * and marks all the affected blocks as valid and modified. If
- * an affected block is not allocated, it will be allocated. If
- * an affected block is not completely overwritten, and is not
- * valid before the operation, it will be read from disk before
- * being partially zeroed.
- */
-STATIC int
-xfs_iozero(
- struct inode *ip, /* inode */
- loff_t pos, /* offset in file */
- size_t count, /* size of data to zero */
- loff_t end_size) /* max file size to set */
-{
- unsigned bytes;
- struct page *page;
- struct address_space *mapping;
- char *kaddr;
- int status;
-
- mapping = ip->i_mapping;
- do {
- unsigned long index, offset;
-
- offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
- index = pos >> PAGE_CACHE_SHIFT;
- bytes = PAGE_CACHE_SIZE - offset;
- if (bytes > count)
- bytes = count;
-
- status = -ENOMEM;
- page = grab_cache_page(mapping, index);
- if (!page)
- break;
-
- kaddr = kmap(page);
- status = mapping->a_ops->prepare_write(NULL, page, offset,
- offset + bytes);
- if (status) {
- goto unlock;
- }
-
- memset((void *) (kaddr + offset), 0, bytes);
- flush_dcache_page(page);
- status = mapping->a_ops->commit_write(NULL, page, offset,
- offset + bytes);
- if (!status) {
- pos += bytes;
- count -= bytes;
- if (pos > i_size_read(ip))
- i_size_write(ip, pos < end_size ? pos : end_size);
- }
-
-unlock:
- kunmap(page);
- unlock_page(page);
- page_cache_release(page);
- if (status)
- break;
- } while (count);
-
- return (-status);
-}
-
-ssize_t /* bytes read, or (-) error */
-xfs_read(
- bhv_desc_t *bdp,
- struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned int segs,
- loff_t *offset,
- int ioflags,
- cred_t *credp)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- size_t size = 0;
- ssize_t ret;
- xfs_fsize_t n;
- xfs_inode_t *ip;
- xfs_mount_t *mp;
- vnode_t *vp;
- unsigned long seg;
-
- ip = XFS_BHVTOI(bdp);
- vp = BHV_TO_VNODE(bdp);
- mp = ip->i_mount;
-
- XFS_STATS_INC(xs_read_calls);
-
- /* START copy & waste from filemap.c */
- for (seg = 0; seg < segs; seg++) {
- const struct iovec *iv = &iovp[seg];
-
- /*
- * If any segment has a negative length, or the cumulative
- * length ever wraps negative then return -EINVAL.
- */
- size += iv->iov_len;
- if (unlikely((ssize_t)(size|iv->iov_len) < 0))
- return XFS_ERROR(-EINVAL);
- }
- /* END copy & waste from filemap.c */
-
- if (unlikely(ioflags & IO_ISDIRECT)) {
- xfs_buftarg_t *target =
- (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
- if ((*offset & target->bt_smask) ||
- (size & target->bt_smask)) {
- if (*offset == ip->i_d.di_size) {
- return (0);
- }
- return -XFS_ERROR(EINVAL);
- }
- }
-
- n = XFS_MAXIOFFSET(mp) - *offset;
- if ((n <= 0) || (size == 0))
- return 0;
-
- if (n < size)
- size = n;
-
- if (XFS_FORCED_SHUTDOWN(mp)) {
- return -EIO;
- }
-
- if (unlikely(ioflags & IO_ISDIRECT))
- mutex_lock(&inode->i_mutex);
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
- if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
- !(ioflags & IO_INVIS)) {
- vrwlock_t locktype = VRWLOCK_READ;
- int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
-
- ret = -XFS_SEND_DATA(mp, DM_EVENT_READ,
- BHV_TO_VNODE(bdp), *offset, size,
- dmflags, &locktype);
- if (ret) {
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- goto unlock_isem;
- }
- }
-
- xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
- (void *)iovp, segs, *offset, ioflags);
- ret = __generic_file_aio_read(iocb, iovp, segs, offset);
- if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
- ret = wait_on_sync_kiocb(iocb);
- if (ret > 0)
- XFS_STATS_ADD(xs_read_bytes, ret);
-
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
-unlock_isem:
- if (unlikely(ioflags & IO_ISDIRECT))
- mutex_unlock(&inode->i_mutex);
- return ret;
-}
-
-ssize_t
-xfs_sendfile(
- bhv_desc_t *bdp,
- struct file *filp,
- loff_t *offset,
- int ioflags,
- size_t count,
- read_actor_t actor,
- void *target,
- cred_t *credp)
-{
- ssize_t ret;
- xfs_fsize_t n;
- xfs_inode_t *ip;
- xfs_mount_t *mp;
- vnode_t *vp;
-
- ip = XFS_BHVTOI(bdp);
- vp = BHV_TO_VNODE(bdp);
- mp = ip->i_mount;
-
- XFS_STATS_INC(xs_read_calls);
-
- n = XFS_MAXIOFFSET(mp) - *offset;
- if ((n <= 0) || (count == 0))
- return 0;
-
- if (n < count)
- count = n;
-
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
-
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
- if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
- (!(ioflags & IO_INVIS))) {
- vrwlock_t locktype = VRWLOCK_READ;
- int error;
-
- error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count,
- FILP_DELAY_FLAG(filp), &locktype);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- return -error;
- }
- }
- xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
- (void *)(unsigned long)target, count, *offset, ioflags);
- ret = generic_file_sendfile(filp, offset, count, actor, target);
-
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
- if (ret > 0)
- XFS_STATS_ADD(xs_read_bytes, ret);
-
- return ret;
-}
-
-/*
- * This routine is called to handle zeroing any space in the last
- * block of the file that is beyond the EOF. We do this since the
- * size is being increased without writing anything to that block
- * and we don't want anyone to read the garbage on the disk.
- */
-STATIC int /* error (positive) */
-xfs_zero_last_block(
- struct inode *ip,
- xfs_iocore_t *io,
- xfs_fsize_t isize,
- xfs_fsize_t end_size)
-{
- xfs_fileoff_t last_fsb;
- xfs_mount_t *mp;
- int nimaps;
- int zero_offset;
- int zero_len;
- int error = 0;
- xfs_bmbt_irec_t imap;
- loff_t loff;
-
- ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
-
- mp = io->io_mount;
-
- zero_offset = XFS_B_FSB_OFFSET(mp, isize);
- if (zero_offset == 0) {
- /*
- * There are no extra bytes in the last block on disk to
- * zero, so return.
- */
- return 0;
- }
-
- last_fsb = XFS_B_TO_FSBT(mp, isize);
- nimaps = 1;
- error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap,
- &nimaps, NULL);
- if (error) {
- return error;
- }
- ASSERT(nimaps > 0);
- /*
- * If the block underlying isize is just a hole, then there
- * is nothing to zero.
- */
- if (imap.br_startblock == HOLESTARTBLOCK) {
- return 0;
- }
- /*
- * Zero the part of the last block beyond the EOF, and write it
- * out sync. We need to drop the ilock while we do this so we
- * don't deadlock when the buffer cache calls back to us.
- */
- XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
- loff = XFS_FSB_TO_B(mp, last_fsb);
-
- zero_len = mp->m_sb.sb_blocksize - zero_offset;
-
- error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
-
- XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
- ASSERT(error >= 0);
- return error;
-}
-
-/*
- * Zero any on disk space between the current EOF and the new,
- * larger EOF. This handles the normal case of zeroing the remainder
- * of the last block in the file and the unusual case of zeroing blocks
- * out beyond the size of the file. This second case only happens
- * with fixed size extents and when the system crashes before the inode
- * size was updated but after blocks were allocated. If fill is set,
- * then any holes in the range are filled and zeroed. If not, the holes
- * are left alone as holes.
- */
-
-int /* error (positive) */
-xfs_zero_eof(
- vnode_t *vp,
- xfs_iocore_t *io,
- xfs_off_t offset, /* starting I/O offset */
- xfs_fsize_t isize, /* current inode size */
- xfs_fsize_t end_size) /* terminal inode size */
-{
- struct inode *ip = LINVFS_GET_IP(vp);
- xfs_fileoff_t start_zero_fsb;
- xfs_fileoff_t end_zero_fsb;
- xfs_fileoff_t zero_count_fsb;
- xfs_fileoff_t last_fsb;
- xfs_extlen_t buf_len_fsb;
- xfs_mount_t *mp;
- int nimaps;
- int error = 0;
- xfs_bmbt_irec_t imap;
-
- ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
- ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
- ASSERT(offset > isize);
-
- mp = io->io_mount;
-
- /*
- * First handle zeroing the block on which isize resides.
- * We only zero a part of that block so it is handled specially.
- */
- error = xfs_zero_last_block(ip, io, isize, end_size);
- if (error) {
- ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
- ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
- return error;
- }
-
- /*
- * Calculate the range between the new size and the old
- * where blocks needing to be zeroed may exist. To get the
- * block where the last byte in the file currently resides,
- * we need to subtract one from the size and truncate back
- * to a block boundary. We subtract 1 in case the size is
- * exactly on a block boundary.
- */
- last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
- start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
- end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
- ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
- if (last_fsb == end_zero_fsb) {
- /*
- * The size was only incremented on its last block.
- * We took care of that above, so just return.
- */
- return 0;
- }
-
- ASSERT(start_zero_fsb <= end_zero_fsb);
- while (start_zero_fsb <= end_zero_fsb) {
- nimaps = 1;
- zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
- error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb,
- 0, NULL, 0, &imap, &nimaps, NULL);
- if (error) {
- ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
- ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
- return error;
- }
- ASSERT(nimaps > 0);
-
- if (imap.br_state == XFS_EXT_UNWRITTEN ||
- imap.br_startblock == HOLESTARTBLOCK) {
- /*
- * This loop handles initializing pages that were
- * partially initialized by the code below this
- * loop. It basically zeroes the part of the page
- * that sits on a hole and sets the page as P_HOLE
- * and calls remapf if it is a mapped file.
- */
- start_zero_fsb = imap.br_startoff + imap.br_blockcount;
- ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
- continue;
- }
-
- /*
- * There are blocks in the range requested.
- * Zero them a single write at a time. We actually
- * don't zero the entire range returned if it is
- * too big and simply loop around to get the rest.
- * That is not the most efficient thing to do, but it
- * is simple and this path should not be exercised often.
- */
- buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount,
- mp->m_writeio_blocks << 8);
- /*
- * Drop the inode lock while we're doing the I/O.
- * We'll still have the iolock to protect us.
- */
- XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
-
- error = xfs_iozero(ip,
- XFS_FSB_TO_B(mp, start_zero_fsb),
- XFS_FSB_TO_B(mp, buf_len_fsb),
- end_size);
-
- if (error) {
- goto out_lock;
- }
-
- start_zero_fsb = imap.br_startoff + buf_len_fsb;
- ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-
- XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
- }
-
- return 0;
-
-out_lock:
-
- XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
- ASSERT(error >= 0);
- return error;
-}
-
-ssize_t /* bytes written, or (-) error */
-xfs_write(
- bhv_desc_t *bdp,
- struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned int nsegs,
- loff_t *offset,
- int ioflags,
- cred_t *credp)
-{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- unsigned long segs = nsegs;
- xfs_inode_t *xip;
- xfs_mount_t *mp;
- ssize_t ret = 0, error = 0;
- xfs_fsize_t isize, new_size;
- xfs_iocore_t *io;
- vnode_t *vp;
- unsigned long seg;
- int iolock;
- int eventsent = 0;
- vrwlock_t locktype;
- size_t ocount = 0, count;
- loff_t pos;
- int need_isem = 1, need_flush = 0;
-
- XFS_STATS_INC(xs_write_calls);
-
- vp = BHV_TO_VNODE(bdp);
- xip = XFS_BHVTOI(bdp);
-
- for (seg = 0; seg < segs; seg++) {
- const struct iovec *iv = &iovp[seg];
-
- /*
- * If any segment has a negative length, or the cumulative
- * length ever wraps negative then return -EINVAL.
- */
- ocount += iv->iov_len;
- if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
- return -EINVAL;
- if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
- continue;
- if (seg == 0)
- return -EFAULT;
- segs = seg;
- ocount -= iv->iov_len; /* This segment is no good */
- break;
- }
-
- count = ocount;
- pos = *offset;
-
- if (count == 0)
- return 0;
-
- io = &xip->i_iocore;
- mp = io->io_mount;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
- fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE);
-
- if (ioflags & IO_ISDIRECT) {
- xfs_buftarg_t *target =
- (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
-
- if ((pos & target->bt_smask) || (count & target->bt_smask))
- return XFS_ERROR(-EINVAL);
-
- if (!VN_CACHED(vp) && pos < i_size_read(inode))
- need_isem = 0;
-
- if (VN_CACHED(vp))
- need_flush = 1;
- }
-
-relock:
- if (need_isem) {
- iolock = XFS_IOLOCK_EXCL;
- locktype = VRWLOCK_WRITE;
-
- mutex_lock(&inode->i_mutex);
- } else {
- iolock = XFS_IOLOCK_SHARED;
- locktype = VRWLOCK_WRITE_DIRECT;
- }
-
- xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
-
- isize = i_size_read(inode);
-
- if (file->f_flags & O_APPEND)
- *offset = isize;
-
-start:
- error = -generic_write_checks(file, &pos, &count,
- S_ISBLK(inode->i_mode));
- if (error) {
- xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
- goto out_unlock_isem;
- }
-
- new_size = pos + count;
- if (new_size > isize)
- io->io_new_size = new_size;
-
- if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
- !(ioflags & IO_INVIS) && !eventsent)) {
- loff_t savedsize = pos;
- int dmflags = FILP_DELAY_FLAG(file);
-
- if (need_isem)
- dmflags |= DM_FLAGS_IMUX;
-
- xfs_iunlock(xip, XFS_ILOCK_EXCL);
- error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
- pos, count,
- dmflags, &locktype);
- if (error) {
- xfs_iunlock(xip, iolock);
- goto out_unlock_isem;
- }
- xfs_ilock(xip, XFS_ILOCK_EXCL);
- eventsent = 1;
-
- /*
- * The iolock was dropped and reaquired in XFS_SEND_DATA
- * so we have to recheck the size when appending.
- * We will only "goto start;" once, since having sent the
- * event prevents another call to XFS_SEND_DATA, which is
- * what allows the size to change in the first place.
- */
- if ((file->f_flags & O_APPEND) && savedsize != isize) {
- pos = isize = xip->i_d.di_size;
- goto start;
- }
- }
-
- if (likely(!(ioflags & IO_INVIS))) {
- file_update_time(file);
- xfs_ichgtime_fast(xip, inode,
- XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- }
-
- /*
- * If the offset is beyond the size of the file, we have a couple
- * of things to do. First, if there is already space allocated
- * we need to either create holes or zero the disk or ...
- *
- * If there is a page where the previous size lands, we need
- * to zero it out up to the new size.
- */
-
- if (pos > isize) {
- error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos,
- isize, pos + count);
- if (error) {
- xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
- goto out_unlock_isem;
- }
- }
- xfs_iunlock(xip, XFS_ILOCK_EXCL);
-
- /*
- * If we're writing the file then make sure to clear the
- * setuid and setgid bits if the process is not being run
- * by root. This keeps people from modifying setuid and
- * setgid binaries.
- */
-
- if (((xip->i_d.di_mode & S_ISUID) ||
- ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) ==
- (S_ISGID | S_IXGRP))) &&
- !capable(CAP_FSETID)) {
- error = xfs_write_clear_setuid(xip);
- if (likely(!error))
- error = -remove_suid(file->f_dentry);
- if (unlikely(error)) {
- xfs_iunlock(xip, iolock);
- goto out_unlock_isem;
- }
- }
-
-retry:
- /* We can write back this queue in page reclaim */
- current->backing_dev_info = mapping->backing_dev_info;
-
- if ((ioflags & IO_ISDIRECT)) {
- if (need_flush) {
- xfs_inval_cached_trace(io, pos, -1,
- ctooff(offtoct(pos)), -1);
- VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)),
- -1, FI_REMAPF_LOCKED);
- }
-
- if (need_isem) {
- /* demote the lock now the cached pages are gone */
- XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
- mutex_unlock(&inode->i_mutex);
-
- iolock = XFS_IOLOCK_SHARED;
- locktype = VRWLOCK_WRITE_DIRECT;
- need_isem = 0;
- }
-
- xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs,
- *offset, ioflags);
- ret = generic_file_direct_write(iocb, iovp,
- &segs, pos, offset, count, ocount);
-
- /*
- * direct-io write to a hole: fall through to buffered I/O
- * for completing the rest of the request.
- */
- if (ret >= 0 && ret != count) {
- XFS_STATS_ADD(xs_write_bytes, ret);
-
- pos += ret;
- count -= ret;
-
- need_isem = 1;
- ioflags &= ~IO_ISDIRECT;
- xfs_iunlock(xip, iolock);
- goto relock;
- }
- } else {
- xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs,
- *offset, ioflags);
- ret = generic_file_buffered_write(iocb, iovp, segs,
- pos, offset, count, ret);
- }
-
- current->backing_dev_info = NULL;
-
- if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
- ret = wait_on_sync_kiocb(iocb);
-
- if ((ret == -ENOSPC) &&
- DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) &&
- !(ioflags & IO_INVIS)) {
-
- xfs_rwunlock(bdp, locktype);
- if (need_isem)
- mutex_unlock(&inode->i_mutex);
- error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
- DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
- 0, 0, 0); /* Delay flag intentionally unused */
- if (error)
- goto out_nounlocks;
- if (need_isem)
- mutex_lock(&inode->i_mutex);
- xfs_rwlock(bdp, locktype);
- pos = xip->i_d.di_size;
- ret = 0;
- goto retry;
- }
-
- isize = i_size_read(inode);
- if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
- *offset = isize;
-
- if (*offset > xip->i_d.di_size) {
- xfs_ilock(xip, XFS_ILOCK_EXCL);
- if (*offset > xip->i_d.di_size) {
- xip->i_d.di_size = *offset;
- i_size_write(inode, *offset);
- xip->i_update_core = 1;
- xip->i_update_size = 1;
- }
- xfs_iunlock(xip, XFS_ILOCK_EXCL);
- }
-
- error = -ret;
- if (ret <= 0)
- goto out_unlock_internal;
-
- XFS_STATS_ADD(xs_write_bytes, ret);
-
- /* Handle various SYNC-type writes */
- if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
- /*
- * If we're treating this as O_DSYNC and we have not updated the
- * size, force the log.
- */
- if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
- !(xip->i_update_size)) {
- xfs_inode_log_item_t *iip = xip->i_itemp;
-
- /*
- * If an allocation transaction occurred
- * without extending the size, then we have to force
- * the log up the proper point to ensure that the
- * allocation is permanent. We can't count on
- * the fact that buffered writes lock out direct I/O
- * writes - the direct I/O write could have extended
- * the size nontransactionally, then finished before
- * we started. xfs_write_file will think that the file
- * didn't grow but the update isn't safe unless the
- * size change is logged.
- *
- * Force the log if we've committed a transaction
- * against the inode or if someone else has and
- * the commit record hasn't gone to disk (e.g.
- * the inode is pinned). This guarantees that
- * all changes affecting the inode are permanent
- * when we return.
- */
- if (iip && iip->ili_last_lsn) {
- xfs_log_force(mp, iip->ili_last_lsn,
- XFS_LOG_FORCE | XFS_LOG_SYNC);
- } else if (xfs_ipincount(xip) > 0) {
- xfs_log_force(mp, (xfs_lsn_t)0,
- XFS_LOG_FORCE | XFS_LOG_SYNC);
- }
-
- } else {
- xfs_trans_t *tp;
-
- /*
- * O_SYNC or O_DSYNC _with_ a size update are handled
- * the same way.
- *
- * If the write was synchronous then we need to make
- * sure that the inode modification time is permanent.
- * We'll have updated the timestamp above, so here
- * we use a synchronous transaction to log the inode.
- * It's not fast, but it's necessary.
- *
- * If this a dsync write and the size got changed
- * non-transactionally, then we need to ensure that
- * the size change gets logged in a synchronous
- * transaction.
- */
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
- if ((error = xfs_trans_reserve(tp, 0,
- XFS_SWRITE_LOG_RES(mp),
- 0, 0, 0))) {
- /* Transaction reserve failed */
- xfs_trans_cancel(tp, 0);
- } else {
- /* Transaction reserve successful */
- xfs_ilock(xip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL);
- xfs_trans_ihold(tp, xip);
- xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE);
- xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0, NULL);
- xfs_iunlock(xip, XFS_ILOCK_EXCL);
- }
- if (error)
- goto out_unlock_internal;
- }
-
- xfs_rwunlock(bdp, locktype);
- if (need_isem)
- mutex_unlock(&inode->i_mutex);
-
- error = sync_page_range(inode, mapping, pos, ret);
- if (!error)
- error = ret;
- return error;
- }
-
- out_unlock_internal:
- xfs_rwunlock(bdp, locktype);
- out_unlock_isem:
- if (need_isem)
- mutex_unlock(&inode->i_mutex);
- out_nounlocks:
- return -error;
-}
-
-/*
- * All xfs metadata buffers except log state machine buffers
- * get this attached as their b_bdstrat callback function.
- * This is so that we can catch a buffer
- * after prematurely unpinning it to forcibly shutdown the filesystem.
- */
-int
-xfs_bdstrat_cb(struct xfs_buf *bp)
-{
- xfs_mount_t *mp;
-
- mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
- if (!XFS_FORCED_SHUTDOWN(mp)) {
- xfs_buf_iorequest(bp);
- return 0;
- } else {
- xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
- /*
- * Metadata write that didn't get logged but
- * written delayed anyway. These aren't associated
- * with a transaction, and can be ignored.
- */
- if (XFS_BUF_IODONE_FUNC(bp) == NULL &&
- (XFS_BUF_ISREAD(bp)) == 0)
- return (xfs_bioerror_relse(bp));
- else
- return (xfs_bioerror(bp));
- }
-}
-
-
-int
-xfs_bmap(bhv_desc_t *bdp,
- xfs_off_t offset,
- ssize_t count,
- int flags,
- xfs_iomap_t *iomapp,
- int *niomaps)
-{
- xfs_inode_t *ip = XFS_BHVTOI(bdp);
- xfs_iocore_t *io = &ip->i_iocore;
-
- ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
- ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
- ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
-
- return xfs_iomap(io, offset, count, flags, iomapp, niomaps);
-}
-
-/*
- * Wrapper around bdstrat so that we can stop data
- * from going to disk in case we are shutting down the filesystem.
- * Typically user data goes thru this path; one of the exceptions
- * is the superblock.
- */
-int
-xfsbdstrat(
- struct xfs_mount *mp,
- struct xfs_buf *bp)
-{
- ASSERT(mp);
- if (!XFS_FORCED_SHUTDOWN(mp)) {
- /* Grio redirection would go here
- * if (XFS_BUF_IS_GRIO(bp)) {
- */
-
- xfs_buf_iorequest(bp);
- return 0;
- }
-
- xfs_buftrace("XFSBDSTRAT IOERROR", bp);
- return (xfs_bioerror_relse(bp));
-}
-
-/*
- * If the underlying (data/log/rt) device is readonly, there are some
- * operations that cannot proceed.
- */
-int
-xfs_dev_is_read_only(
- xfs_mount_t *mp,
- char *message)
-{
- if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
- xfs_readonly_buftarg(mp->m_logdev_targp) ||
- (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
- cmn_err(CE_NOTE,
- "XFS: %s required on read-only device.", message);
- cmn_err(CE_NOTE,
- "XFS: write access unavailable, cannot proceed.");
- return EROFS;
- }
- return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
deleted file mode 100644
index 38864a88d42..00000000000
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_LRW_H__
-#define __XFS_LRW_H__
-
-struct vnode;
-struct bhv_desc;
-struct xfs_mount;
-struct xfs_iocore;
-struct xfs_inode;
-struct xfs_bmbt_irec;
-struct xfs_buf;
-struct xfs_iomap;
-
-#if defined(XFS_RW_TRACE)
-/*
- * Defines for the trace mechanisms in xfs_lrw.c.
- */
-#define XFS_RW_KTRACE_SIZE 128
-
-#define XFS_READ_ENTER 1
-#define XFS_WRITE_ENTER 2
-#define XFS_IOMAP_READ_ENTER 3
-#define XFS_IOMAP_WRITE_ENTER 4
-#define XFS_IOMAP_READ_MAP 5
-#define XFS_IOMAP_WRITE_MAP 6
-#define XFS_IOMAP_WRITE_NOSPACE 7
-#define XFS_ITRUNC_START 8
-#define XFS_ITRUNC_FINISH1 9
-#define XFS_ITRUNC_FINISH2 10
-#define XFS_CTRUNC1 11
-#define XFS_CTRUNC2 12
-#define XFS_CTRUNC3 13
-#define XFS_CTRUNC4 14
-#define XFS_CTRUNC5 15
-#define XFS_CTRUNC6 16
-#define XFS_BUNMAPI 17
-#define XFS_INVAL_CACHED 18
-#define XFS_DIORD_ENTER 19
-#define XFS_DIOWR_ENTER 20
-#define XFS_SENDFILE_ENTER 21
-#define XFS_WRITEPAGE_ENTER 22
-#define XFS_RELEASEPAGE_ENTER 23
-#define XFS_INVALIDPAGE_ENTER 24
-#define XFS_IOMAP_ALLOC_ENTER 25
-#define XFS_IOMAP_ALLOC_MAP 26
-#define XFS_IOMAP_UNWRITTEN 27
-extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
- void *, size_t, loff_t, int);
-extern void xfs_inval_cached_trace(struct xfs_iocore *,
- xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t);
-#else
-#define xfs_rw_enter_trace(tag, io, data, size, offset, ioflags)
-#define xfs_inval_cached_trace(io, offset, len, first, last)
-#endif
-
-/*
- * Maximum count of bmaps used by read and write paths.
- */
-#define XFS_MAX_RW_NBMAPS 4
-
-extern int xfs_bmap(struct bhv_desc *, xfs_off_t, ssize_t, int,
- struct xfs_iomap *, int *);
-extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-extern int xfs_bdstrat_cb(struct xfs_buf *);
-
-extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
- xfs_fsize_t, xfs_fsize_t);
-extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
- const struct iovec *, unsigned int,
- loff_t *, int, struct cred *);
-extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
- const struct iovec *, unsigned int,
- loff_t *, int, struct cred *);
-extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
- loff_t *, int, size_t, read_actor_t,
- void *, struct cred *);
-
-extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
-
-#endif /* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
deleted file mode 100644
index 8955720a2c6..00000000000
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include <linux/proc_fs.h>
-
-DEFINE_PER_CPU(struct xfsstats, xfsstats);
-
-STATIC int
-xfs_read_xfsstats(
- char *buffer,
- char **start,
- off_t offset,
- int count,
- int *eof,
- void *data)
-{
- int c, i, j, len, val;
- __uint64_t xs_xstrat_bytes = 0;
- __uint64_t xs_write_bytes = 0;
- __uint64_t xs_read_bytes = 0;
-
- static const struct xstats_entry {
- char *desc;
- int endpoint;
- } xstats[] = {
- { "extent_alloc", XFSSTAT_END_EXTENT_ALLOC },
- { "abt", XFSSTAT_END_ALLOC_BTREE },
- { "blk_map", XFSSTAT_END_BLOCK_MAPPING },
- { "bmbt", XFSSTAT_END_BLOCK_MAP_BTREE },
- { "dir", XFSSTAT_END_DIRECTORY_OPS },
- { "trans", XFSSTAT_END_TRANSACTIONS },
- { "ig", XFSSTAT_END_INODE_OPS },
- { "log", XFSSTAT_END_LOG_OPS },
- { "push_ail", XFSSTAT_END_TAIL_PUSHING },
- { "xstrat", XFSSTAT_END_WRITE_CONVERT },
- { "rw", XFSSTAT_END_READ_WRITE_OPS },
- { "attr", XFSSTAT_END_ATTRIBUTE_OPS },
- { "icluster", XFSSTAT_END_INODE_CLUSTER },
- { "vnodes", XFSSTAT_END_VNODE_OPS },
- { "buf", XFSSTAT_END_BUF },
- };
-
- /* Loop over all stats groups */
- for (i=j=len = 0; i < sizeof(xstats)/sizeof(struct xstats_entry); i++) {
- len += sprintf(buffer + len, xstats[i].desc);
- /* inner loop does each group */
- while (j < xstats[i].endpoint) {
- val = 0;
- /* sum over all cpus */
- for (c = 0; c < NR_CPUS; c++) {
- if (!cpu_possible(c)) continue;
- val += *(((__u32*)&per_cpu(xfsstats, c) + j));
- }
- len += sprintf(buffer + len, " %u", val);
- j++;
- }
- buffer[len++] = '\n';
- }
- /* extra precision counters */
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_possible(i)) continue;
- xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
- xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
- xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
- }
-
- len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n",
- xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
- len += sprintf(buffer + len, "debug %u\n",
-#if defined(DEBUG)
- 1);
-#else
- 0);
-#endif
-
- if (offset >= len) {
- *start = buffer;
- *eof = 1;
- return 0;
- }
- *start = buffer + offset;
- if ((len -= offset) > count)
- return count;
- *eof = 1;
-
- return len;
-}
-
-void
-xfs_init_procfs(void)
-{
- if (!proc_mkdir("fs/xfs", NULL))
- return;
- create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL);
-}
-
-void
-xfs_cleanup_procfs(void)
-{
- remove_proc_entry("fs/xfs/stat", NULL);
- remove_proc_entry("fs/xfs", NULL);
-}
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
deleted file mode 100644
index f22e426d9e4..00000000000
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ /dev/null
@@ -1,1014 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_clnt.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_version.h"
-
-#include <linux/namei.h>
-#include <linux/init.h>
-#include <linux/mount.h>
-#include <linux/mempool.h>
-#include <linux/writeback.h>
-#include <linux/kthread.h>
-
-STATIC struct quotactl_ops linvfs_qops;
-STATIC struct super_operations linvfs_sops;
-STATIC kmem_zone_t *xfs_vnode_zone;
-STATIC kmem_zone_t *xfs_ioend_zone;
-mempool_t *xfs_ioend_pool;
-
-STATIC struct xfs_mount_args *
-xfs_args_allocate(
- struct super_block *sb)
-{
- struct xfs_mount_args *args;
-
- args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
- args->logbufs = args->logbufsize = -1;
- strncpy(args->fsname, sb->s_id, MAXNAMELEN);
-
- /* Copy the already-parsed mount(2) flags we're interested in */
- if (sb->s_flags & MS_NOATIME)
- args->flags |= XFSMNT_NOATIME;
- if (sb->s_flags & MS_DIRSYNC)
- args->flags |= XFSMNT_DIRSYNC;
- if (sb->s_flags & MS_SYNCHRONOUS)
- args->flags |= XFSMNT_WSYNC;
-
- /* Default to 32 bit inodes on Linux all the time */
- args->flags |= XFSMNT_32BITINODES;
-
- return args;
-}
-
-__uint64_t
-xfs_max_file_offset(
- unsigned int blockshift)
-{
- unsigned int pagefactor = 1;
- unsigned int bitshift = BITS_PER_LONG - 1;
-
- /* Figure out maximum filesize, on Linux this can depend on
- * the filesystem blocksize (on 32 bit platforms).
- * __block_prepare_write does this in an [unsigned] long...
- * page->index << (PAGE_CACHE_SHIFT - bbits)
- * So, for page sized blocks (4K on 32 bit platforms),
- * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
- * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
- * but for smaller blocksizes it is less (bbits = log2 bsize).
- * Note1: get_block_t takes a long (implicit cast from above)
- * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
- * can optionally convert the [unsigned] long from above into
- * an [unsigned] long long.
- */
-
-#if BITS_PER_LONG == 32
-# if defined(CONFIG_LBD)
- ASSERT(sizeof(sector_t) == 8);
- pagefactor = PAGE_CACHE_SIZE;
- bitshift = BITS_PER_LONG;
-# else
- pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
-# endif
-#endif
-
- return (((__uint64_t)pagefactor) << bitshift) - 1;
-}
-
-STATIC __inline__ void
-xfs_set_inodeops(
- struct inode *inode)
-{
- switch (inode->i_mode & S_IFMT) {
- case S_IFREG:
- inode->i_op = &linvfs_file_inode_operations;
- inode->i_fop = &linvfs_file_operations;
- inode->i_mapping->a_ops = &linvfs_aops;
- break;
- case S_IFDIR:
- inode->i_op = &linvfs_dir_inode_operations;
- inode->i_fop = &linvfs_dir_operations;
- break;
- case S_IFLNK:
- inode->i_op = &linvfs_symlink_inode_operations;
- if (inode->i_blocks)
- inode->i_mapping->a_ops = &linvfs_aops;
- break;
- default:
- inode->i_op = &linvfs_file_inode_operations;
- init_special_inode(inode, inode->i_mode, inode->i_rdev);
- break;
- }
-}
-
-STATIC __inline__ void
-xfs_revalidate_inode(
- xfs_mount_t *mp,
- vnode_t *vp,
- xfs_inode_t *ip)
-{
- struct inode *inode = LINVFS_GET_IP(vp);
-
- inode->i_mode = ip->i_d.di_mode;
- inode->i_nlink = ip->i_d.di_nlink;
- inode->i_uid = ip->i_d.di_uid;
- inode->i_gid = ip->i_d.di_gid;
-
- switch (inode->i_mode & S_IFMT) {
- case S_IFBLK:
- case S_IFCHR:
- inode->i_rdev =
- MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
- sysv_minor(ip->i_df.if_u2.if_rdev));
- break;
- default:
- inode->i_rdev = 0;
- break;
- }
-
- inode->i_blksize = xfs_preferred_iosize(mp);
- inode->i_generation = ip->i_d.di_gen;
- i_size_write(inode, ip->i_d.di_size);
- inode->i_blocks =
- XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
- inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec;
- inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
- inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
- inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
- inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
- inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
- if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
- inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= ~S_IMMUTABLE;
- if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
- inode->i_flags |= S_APPEND;
- else
- inode->i_flags &= ~S_APPEND;
- if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
- inode->i_flags |= S_SYNC;
- else
- inode->i_flags &= ~S_SYNC;
- if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
- inode->i_flags |= S_NOATIME;
- else
- inode->i_flags &= ~S_NOATIME;
- vp->v_flag &= ~VMODIFIED;
-}
-
-void
-xfs_initialize_vnode(
- bhv_desc_t *bdp,
- vnode_t *vp,
- bhv_desc_t *inode_bhv,
- int unlock)
-{
- xfs_inode_t *ip = XFS_BHVTOI(inode_bhv);
- struct inode *inode = LINVFS_GET_IP(vp);
-
- if (!inode_bhv->bd_vobj) {
- vp->v_vfsp = bhvtovfs(bdp);
- bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops);
- bhv_insert(VN_BHV_HEAD(vp), inode_bhv);
- }
-
- /*
- * We need to set the ops vectors, and unlock the inode, but if
- * we have been called during the new inode create process, it is
- * too early to fill in the Linux inode. We will get called a
- * second time once the inode is properly set up, and then we can
- * finish our work.
- */
- if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
- xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
- xfs_set_inodeops(inode);
-
- ip->i_flags &= ~XFS_INEW;
- barrier();
-
- unlock_new_inode(inode);
- }
-}
-
-int
-xfs_blkdev_get(
- xfs_mount_t *mp,
- const char *name,
- struct block_device **bdevp)
-{
- int error = 0;
-
- *bdevp = open_bdev_excl(name, 0, mp);
- if (IS_ERR(*bdevp)) {
- error = PTR_ERR(*bdevp);
- printk("XFS: Invalid device [%s], error=%d\n", name, error);
- }
-
- return -error;
-}
-
-void
-xfs_blkdev_put(
- struct block_device *bdev)
-{
- if (bdev)
- close_bdev_excl(bdev);
-}
-
-/*
- * Try to write out the superblock using barriers.
- */
-STATIC int
-xfs_barrier_test(
- xfs_mount_t *mp)
-{
- xfs_buf_t *sbp = xfs_getsb(mp, 0);
- int error;
-
- XFS_BUF_UNDONE(sbp);
- XFS_BUF_UNREAD(sbp);
- XFS_BUF_UNDELAYWRITE(sbp);
- XFS_BUF_WRITE(sbp);
- XFS_BUF_UNASYNC(sbp);
- XFS_BUF_ORDERED(sbp);
-
- xfsbdstrat(mp, sbp);
- error = xfs_iowait(sbp);
-
- /*
- * Clear all the flags we set and possible error state in the
- * buffer. We only did the write to try out whether barriers
- * worked and shouldn't leave any traces in the superblock
- * buffer.
- */
- XFS_BUF_DONE(sbp);
- XFS_BUF_ERROR(sbp, 0);
- XFS_BUF_UNORDERED(sbp);
-
- xfs_buf_relse(sbp);
- return error;
-}
-
-void
-xfs_mountfs_check_barriers(xfs_mount_t *mp)
-{
- int error;
-
- if (mp->m_logdev_targp != mp->m_ddev_targp) {
- xfs_fs_cmn_err(CE_NOTE, mp,
- "Disabling barriers, not supported with external log device");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-
- if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
- QUEUE_ORDERED_NONE) {
- xfs_fs_cmn_err(CE_NOTE, mp,
- "Disabling barriers, not supported by the underlying device");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-
- error = xfs_barrier_test(mp);
- if (error) {
- xfs_fs_cmn_err(CE_NOTE, mp,
- "Disabling barriers, trial barrier write failed");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-}
-
-void
-xfs_blkdev_issue_flush(
- xfs_buftarg_t *buftarg)
-{
- blkdev_issue_flush(buftarg->bt_bdev, NULL);
-}
-
-STATIC struct inode *
-linvfs_alloc_inode(
- struct super_block *sb)
-{
- vnode_t *vp;
-
- vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP));
- if (!vp)
- return NULL;
- return LINVFS_GET_IP(vp);
-}
-
-STATIC void
-linvfs_destroy_inode(
- struct inode *inode)
-{
- kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode));
-}
-
-STATIC void
-linvfs_inode_init_once(
- void *data,
- kmem_cache_t *cachep,
- unsigned long flags)
-{
- vnode_t *vp = (vnode_t *)data;
-
- if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
- SLAB_CTOR_CONSTRUCTOR)
- inode_init_once(LINVFS_GET_IP(vp));
-}
-
-STATIC int
-linvfs_init_zones(void)
-{
- xfs_vnode_zone = kmem_cache_create("xfs_vnode",
- sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
- linvfs_inode_init_once, NULL);
- if (!xfs_vnode_zone)
- goto out;
-
- xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
- if (!xfs_ioend_zone)
- goto out_destroy_vnode_zone;
-
- xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE,
- mempool_alloc_slab, mempool_free_slab,
- xfs_ioend_zone);
- if (!xfs_ioend_pool)
- goto out_free_ioend_zone;
-
- return 0;
-
-
- out_free_ioend_zone:
- kmem_zone_destroy(xfs_ioend_zone);
- out_destroy_vnode_zone:
- kmem_zone_destroy(xfs_vnode_zone);
- out:
- return -ENOMEM;
-}
-
-STATIC void
-linvfs_destroy_zones(void)
-{
- mempool_destroy(xfs_ioend_pool);
- kmem_zone_destroy(xfs_vnode_zone);
- kmem_zone_destroy(xfs_ioend_zone);
-}
-
-/*
- * Attempt to flush the inode, this will actually fail
- * if the inode is pinned, but we dirty the inode again
- * at the point when it is unpinned after a log write,
- * since this is when the inode itself becomes flushable.
- */
-STATIC int
-linvfs_write_inode(
- struct inode *inode,
- int sync)
-{
- vnode_t *vp = LINVFS_GET_VP(inode);
- int error = 0, flags = FLUSH_INODE;
-
- if (vp) {
- vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
- if (sync)
- flags |= FLUSH_SYNC;
- VOP_IFLUSH(vp, flags, error);
- if (error == EAGAIN) {
- if (sync)
- VOP_IFLUSH(vp, flags | FLUSH_LOG, error);
- else
- error = 0;
- }
- }
-
- return -error;
-}
-
-STATIC void
-linvfs_clear_inode(
- struct inode *inode)
-{
- vnode_t *vp = LINVFS_GET_VP(inode);
- int error, cache;
-
- vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address);
-
- XFS_STATS_INC(vn_rele);
- XFS_STATS_INC(vn_remove);
- XFS_STATS_INC(vn_reclaim);
- XFS_STATS_DEC(vn_active);
-
- /*
- * This can happen because xfs_iget_core calls xfs_idestroy if we
- * find an inode with di_mode == 0 but without IGET_CREATE set.
- */
- if (vp->v_fbhv)
- VOP_INACTIVE(vp, NULL, cache);
-
- VN_LOCK(vp);
- vp->v_flag &= ~VMODIFIED;
- VN_UNLOCK(vp, 0);
-
- if (vp->v_fbhv) {
- VOP_RECLAIM(vp, error);
- if (error)
- panic("vn_purge: cannot reclaim");
- }
-
- ASSERT(vp->v_fbhv == NULL);
-
-#ifdef XFS_VNODE_TRACE
- ktrace_free(vp->v_trace);
-#endif
-}
-
-/*
- * Enqueue a work item to be picked up by the vfs xfssyncd thread.
- * Doing this has two advantages:
- * - It saves on stack space, which is tight in certain situations
- * - It can be used (with care) as a mechanism to avoid deadlocks.
- * Flushing while allocating in a full filesystem requires both.
- */
-STATIC void
-xfs_syncd_queue_work(
- struct vfs *vfs,
- void *data,
- void (*syncer)(vfs_t *, void *))
-{
- vfs_sync_work_t *work;
-
- work = kmem_alloc(sizeof(struct vfs_sync_work), KM_SLEEP);
- INIT_LIST_HEAD(&work->w_list);
- work->w_syncer = syncer;
- work->w_data = data;
- work->w_vfs = vfs;
- spin_lock(&vfs->vfs_sync_lock);
- list_add_tail(&work->w_list, &vfs->vfs_sync_list);
- spin_unlock(&vfs->vfs_sync_lock);
- wake_up_process(vfs->vfs_sync_task);
-}
-
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations. At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room...
- */
-STATIC void
-xfs_flush_inode_work(
- vfs_t *vfs,
- void *inode)
-{
- filemap_flush(((struct inode *)inode)->i_mapping);
- iput((struct inode *)inode);
-}
-
-void
-xfs_flush_inode(
- xfs_inode_t *ip)
-{
- struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
- struct vfs *vfs = XFS_MTOVFS(ip->i_mount);
-
- igrab(inode);
- xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
- delay(msecs_to_jiffies(500));
-}
-
-/*
- * This is the "bigger hammer" version of xfs_flush_inode_work...
- * (IOW, "If at first you don't succeed, use a Bigger Hammer").
- */
-STATIC void
-xfs_flush_device_work(
- vfs_t *vfs,
- void *inode)
-{
- sync_blockdev(vfs->vfs_super->s_bdev);
- iput((struct inode *)inode);
-}
-
-void
-xfs_flush_device(
- xfs_inode_t *ip)
-{
- struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
- struct vfs *vfs = XFS_MTOVFS(ip->i_mount);
-
- igrab(inode);
- xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
- delay(msecs_to_jiffies(500));
- xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
-}
-
-#define SYNCD_FLAGS (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
-STATIC void
-vfs_sync_worker(
- vfs_t *vfsp,
- void *unused)
-{
- int error;
-
- if (!(vfsp->vfs_flag & VFS_RDONLY))
- VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error);
- vfsp->vfs_sync_seq++;
- wmb();
- wake_up(&vfsp->vfs_wait_single_sync_task);
-}
-
-STATIC int
-xfssyncd(
- void *arg)
-{
- long timeleft;
- vfs_t *vfsp = (vfs_t *) arg;
- struct vfs_sync_work *work, *n;
- LIST_HEAD (tmp);
-
- timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
- for (;;) {
- timeleft = schedule_timeout_interruptible(timeleft);
- /* swsusp */
- try_to_freeze();
- if (kthread_should_stop() && list_empty(&vfsp->vfs_sync_list))
- break;
-
- spin_lock(&vfsp->vfs_sync_lock);
- /*
- * We can get woken by laptop mode, to do a sync -
- * that's the (only!) case where the list would be
- * empty with time remaining.
- */
- if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
- if (!timeleft)
- timeleft = xfs_syncd_centisecs *
- msecs_to_jiffies(10);
- INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
- list_add_tail(&vfsp->vfs_sync_work.w_list,
- &vfsp->vfs_sync_list);
- }
- list_for_each_entry_safe(work, n, &vfsp->vfs_sync_list, w_list)
- list_move(&work->w_list, &tmp);
- spin_unlock(&vfsp->vfs_sync_lock);
-
- list_for_each_entry_safe(work, n, &tmp, w_list) {
- (*work->w_syncer)(vfsp, work->w_data);
- list_del(&work->w_list);
- if (work == &vfsp->vfs_sync_work)
- continue;
- kmem_free(work, sizeof(struct vfs_sync_work));
- }
- }
-
- return 0;
-}
-
-STATIC int
-linvfs_start_syncd(
- vfs_t *vfsp)
-{
- vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
- vfsp->vfs_sync_work.w_vfs = vfsp;
- vfsp->vfs_sync_task = kthread_run(xfssyncd, vfsp, "xfssyncd");
- if (IS_ERR(vfsp->vfs_sync_task))
- return -PTR_ERR(vfsp->vfs_sync_task);
- return 0;
-}
-
-STATIC void
-linvfs_stop_syncd(
- vfs_t *vfsp)
-{
- kthread_stop(vfsp->vfs_sync_task);
-}
-
-STATIC void
-linvfs_put_super(
- struct super_block *sb)
-{
- vfs_t *vfsp = LINVFS_GET_VFS(sb);
- int error;
-
- linvfs_stop_syncd(vfsp);
- VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error);
- if (!error)
- VFS_UNMOUNT(vfsp, 0, NULL, error);
- if (error) {
- printk("XFS unmount got error %d\n", error);
- printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp);
- return;
- }
-
- vfs_deallocate(vfsp);
-}
-
-STATIC void
-linvfs_write_super(
- struct super_block *sb)
-{
- vfs_t *vfsp = LINVFS_GET_VFS(sb);
- int error;
-
- if (sb->s_flags & MS_RDONLY) {
- sb->s_dirt = 0; /* paranoia */
- return;
- }
- /* Push the log and superblock a little */
- VFS_SYNC(vfsp, SYNC_FSDATA, NULL, error);
- sb->s_dirt = 0;
-}
-
-STATIC int
-linvfs_sync_super(
- struct super_block *sb,
- int wait)
-{
- vfs_t *vfsp = LINVFS_GET_VFS(sb);
- int error;
- int flags = SYNC_FSDATA;
-
- if (unlikely(sb->s_frozen == SB_FREEZE_WRITE))
- flags = SYNC_QUIESCE;
- else
- flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
-
- VFS_SYNC(vfsp, flags, NULL, error);
- sb->s_dirt = 0;
-
- if (unlikely(laptop_mode)) {
- int prev_sync_seq = vfsp->vfs_sync_seq;
-
- /*
- * The disk must be active because we're syncing.
- * We schedule xfssyncd now (now that the disk is
- * active) instead of later (when it might not be).
- */
- wake_up_process(vfsp->vfs_sync_task);
- /*
- * We have to wait for the sync iteration to complete.
- * If we don't, the disk activity caused by the sync
- * will come after the sync is completed, and that
- * triggers another sync from laptop mode.
- */
- wait_event(vfsp->vfs_wait_single_sync_task,
- vfsp->vfs_sync_seq != prev_sync_seq);
- }
-
- return -error;
-}
-
-STATIC int
-linvfs_statfs(
- struct super_block *sb,
- struct kstatfs *statp)
-{
- vfs_t *vfsp = LINVFS_GET_VFS(sb);
- int error;
-
- VFS_STATVFS(vfsp, statp, NULL, error);
- return -error;
-}
-
-STATIC int
-linvfs_remount(
- struct super_block *sb,
- int *flags,
- char *options)
-{
- vfs_t *vfsp = LINVFS_GET_VFS(sb);
- struct xfs_mount_args *args = xfs_args_allocate(sb);
- int error;
-
- VFS_PARSEARGS(vfsp, options, args, 1, error);
- if (!error)
- VFS_MNTUPDATE(vfsp, flags, args, error);
- kmem_free(args, sizeof(*args));
- return -error;
-}
-
-STATIC void
-linvfs_freeze_fs(
- struct super_block *sb)
-{
- VFS_FREEZE(LINVFS_GET_VFS(sb));
-}
-
-STATIC int
-linvfs_show_options(
- struct seq_file *m,
- struct vfsmount *mnt)
-{
- struct vfs *vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
- int error;
-
- VFS_SHOWARGS(vfsp, m, error);
- return error;
-}
-
-STATIC int
-linvfs_quotasync(
- struct super_block *sb,
- int type)
-{
- struct vfs *vfsp = LINVFS_GET_VFS(sb);
- int error;
-
- VFS_QUOTACTL(vfsp, Q_XQUOTASYNC, 0, (caddr_t)NULL, error);
- return -error;
-}
-
-STATIC int
-linvfs_getxstate(
- struct super_block *sb,
- struct fs_quota_stat *fqs)
-{
- struct vfs *vfsp = LINVFS_GET_VFS(sb);
- int error;
-
- VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
- return -error;
-}
-
-STATIC int
-linvfs_setxstate(
- struct super_block *sb,
- unsigned int flags,
- int op)
-{
- struct vfs *vfsp = LINVFS_GET_VFS(sb);
- int error;
-
- VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
- return -error;
-}
-
-STATIC int
-linvfs_getxquota(
- struct super_block *sb,
- int type,
- qid_t id,
- struct fs_disk_quota *fdq)
-{
- struct vfs *vfsp = LINVFS_GET_VFS(sb);
- int error, getmode;
-
- getmode = (type == USRQUOTA) ? Q_XGETQUOTA :
- ((type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETPQUOTA);
- VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error);
- return -error;
-}
-
-STATIC int
-linvfs_setxquota(
- struct super_block *sb,
- int type,
- qid_t id,
- struct fs_disk_quota *fdq)
-{
- struct vfs *vfsp = LINVFS_GET_VFS(sb);
- int error, setmode;
-
- setmode = (type == USRQUOTA) ? Q_XSETQLIM :
- ((type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETPQLIM);
- VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error);
- return -error;
-}
-
-STATIC int
-linvfs_fill_super(
- struct super_block *sb,
- void *data,
- int silent)
-{
- vnode_t *rootvp;
- struct vfs *vfsp = vfs_allocate();
- struct xfs_mount_args *args = xfs_args_allocate(sb);
- struct kstatfs statvfs;
- int error, error2;
-
- vfsp->vfs_super = sb;
- LINVFS_SET_VFS(sb, vfsp);
- if (sb->s_flags & MS_RDONLY)
- vfsp->vfs_flag |= VFS_RDONLY;
- bhv_insert_all_vfsops(vfsp);
-
- VFS_PARSEARGS(vfsp, (char *)data, args, 0, error);
- if (error) {
- bhv_remove_all_vfsops(vfsp, 1);
- goto fail_vfsop;
- }
-
- sb_min_blocksize(sb, BBSIZE);
-#ifdef CONFIG_XFS_EXPORT
- sb->s_export_op = &linvfs_export_ops;
-#endif
- sb->s_qcop = &linvfs_qops;
- sb->s_op = &linvfs_sops;
-
- VFS_MOUNT(vfsp, args, NULL, error);
- if (error) {
- bhv_remove_all_vfsops(vfsp, 1);
- goto fail_vfsop;
- }
-
- VFS_STATVFS(vfsp, &statvfs, NULL, error);
- if (error)
- goto fail_unmount;
-
- sb->s_dirt = 1;
- sb->s_magic = statvfs.f_type;
- sb->s_blocksize = statvfs.f_bsize;
- sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1;
- sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
- sb->s_time_gran = 1;
- set_posix_acl_flag(sb);
-
- VFS_ROOT(vfsp, &rootvp, error);
- if (error)
- goto fail_unmount;
-
- sb->s_root = d_alloc_root(LINVFS_GET_IP(rootvp));
- if (!sb->s_root) {
- error = ENOMEM;
- goto fail_vnrele;
- }
- if (is_bad_inode(sb->s_root->d_inode)) {
- error = EINVAL;
- goto fail_vnrele;
- }
- if ((error = linvfs_start_syncd(vfsp)))
- goto fail_vnrele;
- vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address);
-
- kmem_free(args, sizeof(*args));
- return 0;
-
-fail_vnrele:
- if (sb->s_root) {
- dput(sb->s_root);
- sb->s_root = NULL;
- } else {
- VN_RELE(rootvp);
- }
-
-fail_unmount:
- VFS_UNMOUNT(vfsp, 0, NULL, error2);
-
-fail_vfsop:
- vfs_deallocate(vfsp);
- kmem_free(args, sizeof(*args));
- return -error;
-}
-
-STATIC struct super_block *
-linvfs_get_sb(
- struct file_system_type *fs_type,
- int flags,
- const char *dev_name,
- void *data)
-{
- return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super);
-}
-
-STATIC struct super_operations linvfs_sops = {
- .alloc_inode = linvfs_alloc_inode,
- .destroy_inode = linvfs_destroy_inode,
- .write_inode = linvfs_write_inode,
- .clear_inode = linvfs_clear_inode,
- .put_super = linvfs_put_super,
- .write_super = linvfs_write_super,
- .sync_fs = linvfs_sync_super,
- .write_super_lockfs = linvfs_freeze_fs,
- .statfs = linvfs_statfs,
- .remount_fs = linvfs_remount,
- .show_options = linvfs_show_options,
-};
-
-STATIC struct quotactl_ops linvfs_qops = {
- .quota_sync = linvfs_quotasync,
- .get_xstate = linvfs_getxstate,
- .set_xstate = linvfs_setxstate,
- .get_xquota = linvfs_getxquota,
- .set_xquota = linvfs_setxquota,
-};
-
-STATIC struct file_system_type xfs_fs_type = {
- .owner = THIS_MODULE,
- .name = "xfs",
- .get_sb = linvfs_get_sb,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
-};
-
-
-STATIC int __init
-init_xfs_fs( void )
-{
- int error;
- struct sysinfo si;
- static char message[] __initdata = KERN_INFO \
- XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n";
-
- printk(message);
-
- si_meminfo(&si);
- xfs_physmem = si.totalram;
-
- ktrace_init(64);
-
- error = linvfs_init_zones();
- if (error < 0)
- goto undo_zones;
-
- error = xfs_buf_init();
- if (error < 0)
- goto undo_buffers;
-
- vn_init();
- xfs_init();
- uuid_init();
- vfs_initquota();
-
- error = register_filesystem(&xfs_fs_type);
- if (error)
- goto undo_register;
- XFS_DM_INIT(&xfs_fs_type);
- return 0;
-
-undo_register:
- xfs_buf_terminate();
-
-undo_buffers:
- linvfs_destroy_zones();
-
-undo_zones:
- return error;
-}
-
-STATIC void __exit
-exit_xfs_fs( void )
-{
- vfs_exitquota();
- XFS_DM_EXIT(&xfs_fs_type);
- unregister_filesystem(&xfs_fs_type);
- xfs_cleanup();
- xfs_buf_terminate();
- linvfs_destroy_zones();
- ktrace_uninit();
-}
-
-module_init(init_xfs_fs);
-module_exit(exit_xfs_fs);
-
-MODULE_AUTHOR("Silicon Graphics, Inc.");
-MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
-MODULE_LICENSE("GPL");
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
deleted file mode 100644
index a0256497242..00000000000
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2001-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-
-static struct ctl_table_header *xfs_table_header;
-
-#ifdef CONFIG_PROC_FS
-STATIC int
-xfs_stats_clear_proc_handler(
- ctl_table *ctl,
- int write,
- struct file *filp,
- void __user *buffer,
- size_t *lenp,
- loff_t *ppos)
-{
- int c, ret, *valp = ctl->data;
- __uint32_t vn_active;
-
- ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos);
-
- if (!ret && write && *valp) {
- printk("XFS Clearing xfsstats\n");
- for (c = 0; c < NR_CPUS; c++) {
- if (!cpu_possible(c)) continue;
- preempt_disable();
- /* save vn_active, it's a universal truth! */
- vn_active = per_cpu(xfsstats, c).vn_active;
- memset(&per_cpu(xfsstats, c), 0,
- sizeof(struct xfsstats));
- per_cpu(xfsstats, c).vn_active = vn_active;
- preempt_enable();
- }
- xfs_stats_clear = 0;
- }
-
- return ret;
-}
-#endif /* CONFIG_PROC_FS */
-
-STATIC ctl_table xfs_table[] = {
- {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max},
-
- {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max},
-
- {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max},
-
- {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.panic_mask.min, &xfs_params.panic_mask.max},
-
- {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.error_level.min, &xfs_params.error_level.max},
-
- {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max},
-
- {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max},
-
- {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max},
-
- {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max},
-
- {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max},
-
- {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max},
-
- {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max},
-
- {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.rotorstep.min, &xfs_params.rotorstep.max},
-
- /* please keep this the last entry */
-#ifdef CONFIG_PROC_FS
- {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val,
- sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler,
- &sysctl_intvec, NULL,
- &xfs_params.stats_clear.min, &xfs_params.stats_clear.max},
-#endif /* CONFIG_PROC_FS */
-
- {0}
-};
-
-STATIC ctl_table xfs_dir_table[] = {
- {FS_XFS, "xfs", NULL, 0, 0555, xfs_table},
- {0}
-};
-
-STATIC ctl_table xfs_root_table[] = {
- {CTL_FS, "fs", NULL, 0, 0555, xfs_dir_table},
- {0}
-};
-
-void
-xfs_sysctl_register(void)
-{
- xfs_table_header = register_sysctl_table(xfs_root_table, 1);
-}
-
-void
-xfs_sysctl_unregister(void)
-{
- if (xfs_table_header)
- unregister_sysctl_table(xfs_table_header);
-}
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
deleted file mode 100644
index c855d62e534..00000000000
--- a/fs/xfs/linux-2.6/xfs_vfs.c
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_clnt.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_imap.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-
-int
-vfs_mount(
- struct bhv_desc *bdp,
- struct xfs_mount_args *args,
- struct cred *cr)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_mount)
- next = BHV_NEXT(next);
- return ((*bhvtovfsops(next)->vfs_mount)(next, args, cr));
-}
-
-int
-vfs_parseargs(
- struct bhv_desc *bdp,
- char *s,
- struct xfs_mount_args *args,
- int f)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_parseargs)
- next = BHV_NEXT(next);
- return ((*bhvtovfsops(next)->vfs_parseargs)(next, s, args, f));
-}
-
-int
-vfs_showargs(
- struct bhv_desc *bdp,
- struct seq_file *m)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_showargs)
- next = BHV_NEXT(next);
- return ((*bhvtovfsops(next)->vfs_showargs)(next, m));
-}
-
-int
-vfs_unmount(
- struct bhv_desc *bdp,
- int fl,
- struct cred *cr)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_unmount)
- next = BHV_NEXT(next);
- return ((*bhvtovfsops(next)->vfs_unmount)(next, fl, cr));
-}
-
-int
-vfs_mntupdate(
- struct bhv_desc *bdp,
- int *fl,
- struct xfs_mount_args *args)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_mntupdate)
- next = BHV_NEXT(next);
- return ((*bhvtovfsops(next)->vfs_mntupdate)(next, fl, args));
-}
-
-int
-vfs_root(
- struct bhv_desc *bdp,
- struct vnode **vpp)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_root)
- next = BHV_NEXT(next);
- return ((*bhvtovfsops(next)->vfs_root)(next, vpp));
-}
-
-int
-vfs_statvfs(
- struct bhv_desc *bdp,
- xfs_statfs_t *sp,
- struct vnode *vp)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_statvfs)
- next = BHV_NEXT(next);
- return ((*bhvtovfsops(next)->vfs_statvfs)(next, sp, vp));
-}
-
-int
-vfs_sync(
- struct bhv_desc *bdp,
- int fl,
- struct cred *cr)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_sync)
- next = BHV_NEXT(next);
- return ((*bhvtovfsops(next)->vfs_sync)(next, fl, cr));
-}
-
-int
-vfs_vget(
- struct bhv_desc *bdp,
- struct vnode **vpp,
- struct fid *fidp)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_vget)
- next = BHV_NEXT(next);
- return ((*bhvtovfsops(next)->vfs_vget)(next, vpp, fidp));
-}
-
-int
-vfs_dmapiops(
- struct bhv_desc *bdp,
- caddr_t addr)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_dmapiops)
- next = BHV_NEXT(next);
- return ((*bhvtovfsops(next)->vfs_dmapiops)(next, addr));
-}
-
-int
-vfs_quotactl(
- struct bhv_desc *bdp,
- int cmd,
- int id,
- caddr_t addr)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_quotactl)
- next = BHV_NEXT(next);
- return ((*bhvtovfsops(next)->vfs_quotactl)(next, cmd, id, addr));
-}
-
-void
-vfs_init_vnode(
- struct bhv_desc *bdp,
- struct vnode *vp,
- struct bhv_desc *bp,
- int unlock)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_init_vnode)
- next = BHV_NEXT(next);
- ((*bhvtovfsops(next)->vfs_init_vnode)(next, vp, bp, unlock));
-}
-
-void
-vfs_force_shutdown(
- struct bhv_desc *bdp,
- int fl,
- char *file,
- int line)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_force_shutdown)
- next = BHV_NEXT(next);
- ((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line));
-}
-
-void
-vfs_freeze(
- struct bhv_desc *bdp)
-{
- struct bhv_desc *next = bdp;
-
- ASSERT(next);
- while (! (bhvtovfsops(next))->vfs_freeze)
- next = BHV_NEXT(next);
- ((*bhvtovfsops(next)->vfs_freeze)(next));
-}
-
-vfs_t *
-vfs_allocate( void )
-{
- struct vfs *vfsp;
-
- vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP);
- bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
- INIT_LIST_HEAD(&vfsp->vfs_sync_list);
- spin_lock_init(&vfsp->vfs_sync_lock);
- init_waitqueue_head(&vfsp->vfs_wait_single_sync_task);
- return vfsp;
-}
-
-void
-vfs_deallocate(
- struct vfs *vfsp)
-{
- bhv_head_destroy(VFS_BHVHEAD(vfsp));
- kmem_free(vfsp, sizeof(vfs_t));
-}
-
-void
-vfs_insertops(
- struct vfs *vfsp,
- struct bhv_vfsops *vfsops)
-{
- struct bhv_desc *bdp;
-
- bdp = kmem_alloc(sizeof(struct bhv_desc), KM_SLEEP);
- bhv_desc_init(bdp, NULL, vfsp, vfsops);
- bhv_insert(&vfsp->vfs_bh, bdp);
-}
-
-void
-vfs_insertbhv(
- struct vfs *vfsp,
- struct bhv_desc *bdp,
- struct vfsops *vfsops,
- void *mount)
-{
- bhv_desc_init(bdp, mount, vfsp, vfsops);
- bhv_insert_initial(&vfsp->vfs_bh, bdp);
-}
-
-void
-bhv_remove_vfsops(
- struct vfs *vfsp,
- int pos)
-{
- struct bhv_desc *bhv;
-
- bhv = bhv_lookup_range(&vfsp->vfs_bh, pos, pos);
- if (!bhv)
- return;
- bhv_remove(&vfsp->vfs_bh, bhv);
- kmem_free(bhv, sizeof(*bhv));
-}
-
-void
-bhv_remove_all_vfsops(
- struct vfs *vfsp,
- int freebase)
-{
- struct xfs_mount *mp;
-
- bhv_remove_vfsops(vfsp, VFS_POSITION_QM);
- bhv_remove_vfsops(vfsp, VFS_POSITION_DM);
- if (!freebase)
- return;
- mp = XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfsp), &xfs_vfsops));
- VFS_REMOVEBHV(vfsp, &mp->m_bhv);
- xfs_mount_free(mp, 0);
-}
-
-void
-bhv_insert_all_vfsops(
- struct vfs *vfsp)
-{
- struct xfs_mount *mp;
-
- mp = xfs_mount_init();
- vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
- vfs_insertdmapi(vfsp);
- vfs_insertquota(vfsp);
-}
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
deleted file mode 100644
index 57caf9eddee..00000000000
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_VFS_H__
-#define __XFS_VFS_H__
-
-#include <linux/vfs.h>
-#include "xfs_fs.h"
-
-struct fid;
-struct vfs;
-struct cred;
-struct vnode;
-struct kstatfs;
-struct seq_file;
-struct super_block;
-struct xfs_mount_args;
-
-typedef struct kstatfs xfs_statfs_t;
-
-typedef struct vfs_sync_work {
- struct list_head w_list;
- struct vfs *w_vfs;
- void *w_data; /* syncer routine argument */
- void (*w_syncer)(struct vfs *, void *);
-} vfs_sync_work_t;
-
-typedef struct vfs {
- u_int vfs_flag; /* flags */
- xfs_fsid_t vfs_fsid; /* file system ID */
- xfs_fsid_t *vfs_altfsid; /* An ID fixed for life of FS */
- bhv_head_t vfs_bh; /* head of vfs behavior chain */
- struct super_block *vfs_super; /* generic superblock pointer */
- struct task_struct *vfs_sync_task; /* generalised sync thread */
- vfs_sync_work_t vfs_sync_work; /* work item for VFS_SYNC */
- struct list_head vfs_sync_list; /* sync thread work item list */
- spinlock_t vfs_sync_lock; /* work item list lock */
- int vfs_sync_seq; /* sync thread generation no. */
- wait_queue_head_t vfs_wait_single_sync_task;
-} vfs_t;
-
-#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */
-
-#define bhvtovfs(bdp) ( (struct vfs *)BHV_VOBJ(bdp) )
-#define bhvtovfsops(bdp) ( (struct vfsops *)BHV_OPS(bdp) )
-#define VFS_BHVHEAD(vfs) ( &(vfs)->vfs_bh )
-#define VFS_REMOVEBHV(vfs, bdp) ( bhv_remove(VFS_BHVHEAD(vfs), bdp) )
-
-#define VFS_POSITION_BASE BHV_POSITION_BASE /* chain bottom */
-#define VFS_POSITION_TOP BHV_POSITION_TOP /* chain top */
-#define VFS_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */
-
-typedef enum {
- VFS_BHV_UNKNOWN, /* not specified */
- VFS_BHV_XFS, /* xfs */
- VFS_BHV_DM, /* data migration */
- VFS_BHV_QM, /* quota manager */
- VFS_BHV_IO, /* IO path */
- VFS_BHV_END /* housekeeping end-of-range */
-} vfs_bhv_t;
-
-#define VFS_POSITION_XFS (BHV_POSITION_BASE)
-#define VFS_POSITION_DM (VFS_POSITION_BASE+10)
-#define VFS_POSITION_QM (VFS_POSITION_BASE+20)
-#define VFS_POSITION_IO (VFS_POSITION_BASE+30)
-
-#define VFS_RDONLY 0x0001 /* read-only vfs */
-#define VFS_GRPID 0x0002 /* group-ID assigned from directory */
-#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */
-#define VFS_32BITINODES 0x0008 /* do not use inums above 32 bits */
-#define VFS_END 0x0008 /* max flag */
-
-#define SYNC_ATTR 0x0001 /* sync attributes */
-#define SYNC_CLOSE 0x0002 /* close file system down */
-#define SYNC_DELWRI 0x0004 /* look at delayed writes */
-#define SYNC_WAIT 0x0008 /* wait for i/o to complete */
-#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */
-#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */
-#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
-#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
-#define SYNC_QUIESCE 0x0100 /* quiesce fileystem for a snapshot */
-
-typedef int (*vfs_mount_t)(bhv_desc_t *,
- struct xfs_mount_args *, struct cred *);
-typedef int (*vfs_parseargs_t)(bhv_desc_t *, char *,
- struct xfs_mount_args *, int);
-typedef int (*vfs_showargs_t)(bhv_desc_t *, struct seq_file *);
-typedef int (*vfs_unmount_t)(bhv_desc_t *, int, struct cred *);
-typedef int (*vfs_mntupdate_t)(bhv_desc_t *, int *,
- struct xfs_mount_args *);
-typedef int (*vfs_root_t)(bhv_desc_t *, struct vnode **);
-typedef int (*vfs_statvfs_t)(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
-typedef int (*vfs_sync_t)(bhv_desc_t *, int, struct cred *);
-typedef int (*vfs_vget_t)(bhv_desc_t *, struct vnode **, struct fid *);
-typedef int (*vfs_dmapiops_t)(bhv_desc_t *, caddr_t);
-typedef int (*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t);
-typedef void (*vfs_init_vnode_t)(bhv_desc_t *,
- struct vnode *, bhv_desc_t *, int);
-typedef void (*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
-typedef void (*vfs_freeze_t)(bhv_desc_t *);
-
-typedef struct vfsops {
- bhv_position_t vf_position; /* behavior chain position */
- vfs_mount_t vfs_mount; /* mount file system */
- vfs_parseargs_t vfs_parseargs; /* parse mount options */
- vfs_showargs_t vfs_showargs; /* unparse mount options */
- vfs_unmount_t vfs_unmount; /* unmount file system */
- vfs_mntupdate_t vfs_mntupdate; /* update file system options */
- vfs_root_t vfs_root; /* get root vnode */
- vfs_statvfs_t vfs_statvfs; /* file system statistics */
- vfs_sync_t vfs_sync; /* flush files */
- vfs_vget_t vfs_vget; /* get vnode from fid */
- vfs_dmapiops_t vfs_dmapiops; /* data migration */
- vfs_quotactl_t vfs_quotactl; /* disk quota */
- vfs_init_vnode_t vfs_init_vnode; /* initialize a new vnode */
- vfs_force_shutdown_t vfs_force_shutdown; /* crash and burn */
- vfs_freeze_t vfs_freeze; /* freeze fs for snapshot */
-} vfsops_t;
-
-/*
- * VFS's. Operates on vfs structure pointers (starts at bhv head).
- */
-#define VHEAD(v) ((v)->vfs_fbhv)
-#define VFS_MOUNT(v, ma,cr, rv) ((rv) = vfs_mount(VHEAD(v), ma,cr))
-#define VFS_PARSEARGS(v, o,ma,f, rv) ((rv) = vfs_parseargs(VHEAD(v), o,ma,f))
-#define VFS_SHOWARGS(v, m, rv) ((rv) = vfs_showargs(VHEAD(v), m))
-#define VFS_UNMOUNT(v, f, cr, rv) ((rv) = vfs_unmount(VHEAD(v), f,cr))
-#define VFS_MNTUPDATE(v, fl, args, rv) ((rv) = vfs_mntupdate(VHEAD(v), fl, args))
-#define VFS_ROOT(v, vpp, rv) ((rv) = vfs_root(VHEAD(v), vpp))
-#define VFS_STATVFS(v, sp,vp, rv) ((rv) = vfs_statvfs(VHEAD(v), sp,vp))
-#define VFS_SYNC(v, flag,cr, rv) ((rv) = vfs_sync(VHEAD(v), flag,cr))
-#define VFS_VGET(v, vpp,fidp, rv) ((rv) = vfs_vget(VHEAD(v), vpp,fidp))
-#define VFS_DMAPIOPS(v, p, rv) ((rv) = vfs_dmapiops(VHEAD(v), p))
-#define VFS_QUOTACTL(v, c,id,p, rv) ((rv) = vfs_quotactl(VHEAD(v), c,id,p))
-#define VFS_INIT_VNODE(v, vp,b,ul) ( vfs_init_vnode(VHEAD(v), vp,b,ul) )
-#define VFS_FORCE_SHUTDOWN(v, fl,f,l) ( vfs_force_shutdown(VHEAD(v), fl,f,l) )
-#define VFS_FREEZE(v) ( vfs_freeze(VHEAD(v)) )
-
-/*
- * PVFS's. Operates on behavior descriptor pointers.
- */
-#define PVFS_MOUNT(b, ma,cr, rv) ((rv) = vfs_mount(b, ma,cr))
-#define PVFS_PARSEARGS(b, o,ma,f, rv) ((rv) = vfs_parseargs(b, o,ma,f))
-#define PVFS_SHOWARGS(b, m, rv) ((rv) = vfs_showargs(b, m))
-#define PVFS_UNMOUNT(b, f,cr, rv) ((rv) = vfs_unmount(b, f,cr))
-#define PVFS_MNTUPDATE(b, fl, args, rv) ((rv) = vfs_mntupdate(b, fl, args))
-#define PVFS_ROOT(b, vpp, rv) ((rv) = vfs_root(b, vpp))
-#define PVFS_STATVFS(b, sp,vp, rv) ((rv) = vfs_statvfs(b, sp,vp))
-#define PVFS_SYNC(b, flag,cr, rv) ((rv) = vfs_sync(b, flag,cr))
-#define PVFS_VGET(b, vpp,fidp, rv) ((rv) = vfs_vget(b, vpp,fidp))
-#define PVFS_DMAPIOPS(b, p, rv) ((rv) = vfs_dmapiops(b, p))
-#define PVFS_QUOTACTL(b, c,id,p, rv) ((rv) = vfs_quotactl(b, c,id,p))
-#define PVFS_INIT_VNODE(b, vp,b2,ul) ( vfs_init_vnode(b, vp,b2,ul) )
-#define PVFS_FORCE_SHUTDOWN(b, fl,f,l) ( vfs_force_shutdown(b, fl,f,l) )
-#define PVFS_FREEZE(b) ( vfs_freeze(b) )
-
-extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
-extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
-extern int vfs_showargs(bhv_desc_t *, struct seq_file *);
-extern int vfs_unmount(bhv_desc_t *, int, struct cred *);
-extern int vfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *);
-extern int vfs_root(bhv_desc_t *, struct vnode **);
-extern int vfs_statvfs(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
-extern int vfs_sync(bhv_desc_t *, int, struct cred *);
-extern int vfs_vget(bhv_desc_t *, struct vnode **, struct fid *);
-extern int vfs_dmapiops(bhv_desc_t *, caddr_t);
-extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
-extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int);
-extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
-extern void vfs_freeze(bhv_desc_t *);
-
-typedef struct bhv_vfsops {
- struct vfsops bhv_common;
- void * bhv_custom;
-} bhv_vfsops_t;
-
-#define vfs_bhv_lookup(v, id) ( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) )
-#define vfs_bhv_custom(b) ( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom )
-#define vfs_bhv_set_custom(b,o) ( (b)->bhv_custom = (void *)(o))
-#define vfs_bhv_clr_custom(b) ( (b)->bhv_custom = NULL )
-
-extern vfs_t *vfs_allocate(void);
-extern void vfs_deallocate(vfs_t *);
-extern void vfs_insertops(vfs_t *, bhv_vfsops_t *);
-extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *);
-
-extern void bhv_insert_all_vfsops(struct vfs *);
-extern void bhv_remove_all_vfsops(struct vfs *, int);
-extern void bhv_remove_vfsops(struct vfs *, int);
-
-#define fs_frozen(vfsp) ((vfsp)->vfs_super->s_frozen)
-#define fs_check_frozen(vfsp, level) \
- vfs_check_frozen(vfsp->vfs_super, level);
-
-#endif /* __XFS_VFS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
deleted file mode 100644
index 260dd8415dd..00000000000
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-
-uint64_t vn_generation; /* vnode generation number */
-DEFINE_SPINLOCK(vnumber_lock);
-
-/*
- * Dedicated vnode inactive/reclaim sync semaphores.
- * Prime number of hash buckets since address is used as the key.
- */
-#define NVSYNC 37
-#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC])
-STATIC wait_queue_head_t vsync[NVSYNC];
-
-void
-vn_init(void)
-{
- int i;
-
- for (i = 0; i < NVSYNC; i++)
- init_waitqueue_head(&vsync[i]);
-}
-
-void
-vn_iowait(
- struct vnode *vp)
-{
- wait_queue_head_t *wq = vptosync(vp);
-
- wait_event(*wq, (atomic_read(&vp->v_iocount) == 0));
-}
-
-void
-vn_iowake(
- struct vnode *vp)
-{
- if (atomic_dec_and_test(&vp->v_iocount))
- wake_up(vptosync(vp));
-}
-
-struct vnode *
-vn_initialize(
- struct inode *inode)
-{
- struct vnode *vp = LINVFS_GET_VP(inode);
-
- XFS_STATS_INC(vn_active);
- XFS_STATS_INC(vn_alloc);
-
- vp->v_flag = VMODIFIED;
- spinlock_init(&vp->v_lock, "v_lock");
-
- spin_lock(&vnumber_lock);
- if (!++vn_generation) /* v_number shouldn't be zero */
- vn_generation++;
- vp->v_number = vn_generation;
- spin_unlock(&vnumber_lock);
-
- ASSERT(VN_CACHED(vp) == 0);
-
- /* Initialize the first behavior and the behavior chain head. */
- vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode");
-
- atomic_set(&vp->v_iocount, 0);
-
-#ifdef XFS_VNODE_TRACE
- vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
-#endif /* XFS_VNODE_TRACE */
-
- vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address);
- return vp;
-}
-
-/*
- * Revalidate the Linux inode from the vattr.
- * Note: i_size _not_ updated; we must hold the inode
- * semaphore when doing that - callers responsibility.
- */
-void
-vn_revalidate_core(
- struct vnode *vp,
- vattr_t *vap)
-{
- struct inode *inode = LINVFS_GET_IP(vp);
-
- inode->i_mode = vap->va_mode;
- inode->i_nlink = vap->va_nlink;
- inode->i_uid = vap->va_uid;
- inode->i_gid = vap->va_gid;
- inode->i_blocks = vap->va_nblocks;
- inode->i_mtime = vap->va_mtime;
- inode->i_ctime = vap->va_ctime;
- inode->i_blksize = vap->va_blocksize;
- if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
- inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= ~S_IMMUTABLE;
- if (vap->va_xflags & XFS_XFLAG_APPEND)
- inode->i_flags |= S_APPEND;
- else
- inode->i_flags &= ~S_APPEND;
- if (vap->va_xflags & XFS_XFLAG_SYNC)
- inode->i_flags |= S_SYNC;
- else
- inode->i_flags &= ~S_SYNC;
- if (vap->va_xflags & XFS_XFLAG_NOATIME)
- inode->i_flags |= S_NOATIME;
- else
- inode->i_flags &= ~S_NOATIME;
-}
-
-/*
- * Revalidate the Linux inode from the vnode.
- */
-int
-vn_revalidate(
- struct vnode *vp)
-{
- vattr_t va;
- int error;
-
- vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address);
- ASSERT(vp->v_fbhv != NULL);
-
- va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS;
- VOP_GETATTR(vp, &va, 0, NULL, error);
- if (!error) {
- vn_revalidate_core(vp, &va);
- VUNMODIFY(vp);
- }
- return -error;
-}
-
-/*
- * Add a reference to a referenced vnode.
- */
-struct vnode *
-vn_hold(
- struct vnode *vp)
-{
- struct inode *inode;
-
- XFS_STATS_INC(vn_hold);
-
- VN_LOCK(vp);
- inode = igrab(LINVFS_GET_IP(vp));
- ASSERT(inode);
- VN_UNLOCK(vp, 0);
-
- return vp;
-}
-
-#ifdef XFS_VNODE_TRACE
-
-#define KTRACE_ENTER(vp, vk, s, line, ra) \
- ktrace_enter( (vp)->v_trace, \
-/* 0 */ (void *)(__psint_t)(vk), \
-/* 1 */ (void *)(s), \
-/* 2 */ (void *)(__psint_t) line, \
-/* 3 */ (void *)(__psint_t)(vn_count(vp)), \
-/* 4 */ (void *)(ra), \
-/* 5 */ (void *)(__psunsigned_t)(vp)->v_flag, \
-/* 6 */ (void *)(__psint_t)current_cpu(), \
-/* 7 */ (void *)(__psint_t)current_pid(), \
-/* 8 */ (void *)__return_address, \
-/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL)
-
-/*
- * Vnode tracing code.
- */
-void
-vn_trace_entry(vnode_t *vp, const char *func, inst_t *ra)
-{
- KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra);
-}
-
-void
-vn_trace_exit(vnode_t *vp, const char *func, inst_t *ra)
-{
- KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra);
-}
-
-void
-vn_trace_hold(vnode_t *vp, char *file, int line, inst_t *ra)
-{
- KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra);
-}
-
-void
-vn_trace_ref(vnode_t *vp, char *file, int line, inst_t *ra)
-{
- KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra);
-}
-
-void
-vn_trace_rele(vnode_t *vp, char *file, int line, inst_t *ra)
-{
- KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra);
-}
-#endif /* XFS_VNODE_TRACE */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
deleted file mode 100644
index 0fe2419461d..00000000000
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ /dev/null
@@ -1,644 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Portions Copyright (c) 1989, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#ifndef __XFS_VNODE_H__
-#define __XFS_VNODE_H__
-
-struct uio;
-struct file;
-struct vattr;
-struct xfs_iomap;
-struct attrlist_cursor_kern;
-
-
-typedef xfs_ino_t vnumber_t;
-typedef struct dentry vname_t;
-typedef bhv_head_t vn_bhv_head_t;
-
-/*
- * MP locking protocols:
- * v_flag, v_vfsp VN_LOCK/VN_UNLOCK
- */
-typedef struct vnode {
- __u32 v_flag; /* vnode flags (see below) */
- struct vfs *v_vfsp; /* ptr to containing VFS */
- vnumber_t v_number; /* in-core vnode number */
- vn_bhv_head_t v_bh; /* behavior head */
- spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */
- atomic_t v_iocount; /* outstanding I/O count */
-#ifdef XFS_VNODE_TRACE
- struct ktrace *v_trace; /* trace header structure */
-#endif
- struct inode v_inode; /* Linux inode */
- /* inode MUST be last */
-} vnode_t;
-
-#define VN_ISLNK(vp) S_ISLNK((vp)->v_inode.i_mode)
-#define VN_ISREG(vp) S_ISREG((vp)->v_inode.i_mode)
-#define VN_ISDIR(vp) S_ISDIR((vp)->v_inode.i_mode)
-#define VN_ISCHR(vp) S_ISCHR((vp)->v_inode.i_mode)
-#define VN_ISBLK(vp) S_ISBLK((vp)->v_inode.i_mode)
-
-#define v_fbhv v_bh.bh_first /* first behavior */
-#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */
-
-#define VNODE_POSITION_BASE BHV_POSITION_BASE /* chain bottom */
-#define VNODE_POSITION_TOP BHV_POSITION_TOP /* chain top */
-#define VNODE_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */
-
-typedef enum {
- VN_BHV_UNKNOWN, /* not specified */
- VN_BHV_XFS, /* xfs */
- VN_BHV_DM, /* data migration */
- VN_BHV_QM, /* quota manager */
- VN_BHV_IO, /* IO path */
- VN_BHV_END /* housekeeping end-of-range */
-} vn_bhv_t;
-
-#define VNODE_POSITION_XFS (VNODE_POSITION_BASE)
-#define VNODE_POSITION_DM (VNODE_POSITION_BASE+10)
-#define VNODE_POSITION_QM (VNODE_POSITION_BASE+20)
-#define VNODE_POSITION_IO (VNODE_POSITION_BASE+30)
-
-/*
- * Macros for dealing with the behavior descriptor inside of the vnode.
- */
-#define BHV_TO_VNODE(bdp) ((vnode_t *)BHV_VOBJ(bdp))
-#define BHV_TO_VNODE_NULL(bdp) ((vnode_t *)BHV_VOBJNULL(bdp))
-
-#define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh)))
-#define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name)
-#define vn_bhv_remove(bhp,bdp) bhv_remove(bhp,bdp)
-#define vn_bhv_lookup(bhp,ops) bhv_lookup(bhp,ops)
-#define vn_bhv_lookup_unlocked(bhp,ops) bhv_lookup_unlocked(bhp,ops)
-
-/*
- * Vnode to Linux inode mapping.
- */
-#define LINVFS_GET_VP(inode) ((vnode_t *)list_entry(inode, vnode_t, v_inode))
-#define LINVFS_GET_IP(vp) (&(vp)->v_inode)
-
-/*
- * Vnode flags.
- */
-#define VMODIFIED 0x8 /* XFS inode state possibly differs */
- /* to the Linux inode state. */
-
-/*
- * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter.
- */
-typedef enum vrwlock {
- VRWLOCK_NONE,
- VRWLOCK_READ,
- VRWLOCK_WRITE,
- VRWLOCK_WRITE_DIRECT,
- VRWLOCK_TRY_READ,
- VRWLOCK_TRY_WRITE
-} vrwlock_t;
-
-/*
- * Return values for VOP_INACTIVE. A return value of
- * VN_INACTIVE_NOCACHE implies that the file system behavior
- * has disassociated its state and bhv_desc_t from the vnode.
- */
-#define VN_INACTIVE_CACHE 0
-#define VN_INACTIVE_NOCACHE 1
-
-/*
- * Values for the cmd code given to VOP_VNODE_CHANGE.
- */
-typedef enum vchange {
- VCHANGE_FLAGS_FRLOCKS = 0,
- VCHANGE_FLAGS_ENF_LOCKING = 1,
- VCHANGE_FLAGS_TRUNCATED = 2,
- VCHANGE_FLAGS_PAGE_DIRTY = 3,
- VCHANGE_FLAGS_IOEXCL_COUNT = 4
-} vchange_t;
-
-
-typedef int (*vop_open_t)(bhv_desc_t *, struct cred *);
-typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
- const struct iovec *, unsigned int,
- loff_t *, int, struct cred *);
-typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
- const struct iovec *, unsigned int,
- loff_t *, int, struct cred *);
-typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
- loff_t *, int, size_t, read_actor_t,
- void *, struct cred *);
-typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
- int, unsigned int, void __user *);
-typedef int (*vop_getattr_t)(bhv_desc_t *, struct vattr *, int,
- struct cred *);
-typedef int (*vop_setattr_t)(bhv_desc_t *, struct vattr *, int,
- struct cred *);
-typedef int (*vop_access_t)(bhv_desc_t *, int, struct cred *);
-typedef int (*vop_lookup_t)(bhv_desc_t *, vname_t *, vnode_t **,
- int, vnode_t *, struct cred *);
-typedef int (*vop_create_t)(bhv_desc_t *, vname_t *, struct vattr *,
- vnode_t **, struct cred *);
-typedef int (*vop_remove_t)(bhv_desc_t *, vname_t *, struct cred *);
-typedef int (*vop_link_t)(bhv_desc_t *, vnode_t *, vname_t *,
- struct cred *);
-typedef int (*vop_rename_t)(bhv_desc_t *, vname_t *, vnode_t *, vname_t *,
- struct cred *);
-typedef int (*vop_mkdir_t)(bhv_desc_t *, vname_t *, struct vattr *,
- vnode_t **, struct cred *);
-typedef int (*vop_rmdir_t)(bhv_desc_t *, vname_t *, struct cred *);
-typedef int (*vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *,
- int *);
-typedef int (*vop_symlink_t)(bhv_desc_t *, vname_t *, struct vattr *,
- char *, vnode_t **, struct cred *);
-typedef int (*vop_readlink_t)(bhv_desc_t *, struct uio *, int,
- struct cred *);
-typedef int (*vop_fsync_t)(bhv_desc_t *, int, struct cred *,
- xfs_off_t, xfs_off_t);
-typedef int (*vop_inactive_t)(bhv_desc_t *, struct cred *);
-typedef int (*vop_fid2_t)(bhv_desc_t *, struct fid *);
-typedef int (*vop_release_t)(bhv_desc_t *);
-typedef int (*vop_rwlock_t)(bhv_desc_t *, vrwlock_t);
-typedef void (*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t);
-typedef int (*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int,
- struct xfs_iomap *, int *);
-typedef int (*vop_reclaim_t)(bhv_desc_t *);
-typedef int (*vop_attr_get_t)(bhv_desc_t *, const char *, char *, int *,
- int, struct cred *);
-typedef int (*vop_attr_set_t)(bhv_desc_t *, const char *, char *, int,
- int, struct cred *);
-typedef int (*vop_attr_remove_t)(bhv_desc_t *, const char *,
- int, struct cred *);
-typedef int (*vop_attr_list_t)(bhv_desc_t *, char *, int, int,
- struct attrlist_cursor_kern *, struct cred *);
-typedef void (*vop_link_removed_t)(bhv_desc_t *, vnode_t *, int);
-typedef void (*vop_vnode_change_t)(bhv_desc_t *, vchange_t, __psint_t);
-typedef void (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
-typedef void (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
-typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t,
- uint64_t, int);
-typedef int (*vop_iflush_t)(bhv_desc_t *, int);
-
-
-typedef struct vnodeops {
- bhv_position_t vn_position; /* position within behavior chain */
- vop_open_t vop_open;
- vop_read_t vop_read;
- vop_write_t vop_write;
- vop_sendfile_t vop_sendfile;
- vop_ioctl_t vop_ioctl;
- vop_getattr_t vop_getattr;
- vop_setattr_t vop_setattr;
- vop_access_t vop_access;
- vop_lookup_t vop_lookup;
- vop_create_t vop_create;
- vop_remove_t vop_remove;
- vop_link_t vop_link;
- vop_rename_t vop_rename;
- vop_mkdir_t vop_mkdir;
- vop_rmdir_t vop_rmdir;
- vop_readdir_t vop_readdir;
- vop_symlink_t vop_symlink;
- vop_readlink_t vop_readlink;
- vop_fsync_t vop_fsync;
- vop_inactive_t vop_inactive;
- vop_fid2_t vop_fid2;
- vop_rwlock_t vop_rwlock;
- vop_rwunlock_t vop_rwunlock;
- vop_bmap_t vop_bmap;
- vop_reclaim_t vop_reclaim;
- vop_attr_get_t vop_attr_get;
- vop_attr_set_t vop_attr_set;
- vop_attr_remove_t vop_attr_remove;
- vop_attr_list_t vop_attr_list;
- vop_link_removed_t vop_link_removed;
- vop_vnode_change_t vop_vnode_change;
- vop_ptossvp_t vop_tosspages;
- vop_pflushinvalvp_t vop_flushinval_pages;
- vop_pflushvp_t vop_flush_pages;
- vop_release_t vop_release;
- vop_iflush_t vop_iflush;
-} vnodeops_t;
-
-/*
- * VOP's.
- */
-#define _VOP_(op, vp) (*((vnodeops_t *)(vp)->v_fops)->op)
-
-#define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv) \
- rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
-#define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv) \
- rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
-#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \
- rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr)
-#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \
- rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
-#define VOP_OPEN(vp, cr, rv) \
- rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
-#define VOP_GETATTR(vp, vap, f, cr, rv) \
- rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr)
-#define VOP_SETATTR(vp, vap, f, cr, rv) \
- rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr)
-#define VOP_ACCESS(vp, mode, cr, rv) \
- rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr)
-#define VOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv) \
- rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr)
-#define VOP_CREATE(dvp,d,vap,vpp,cr,rv) \
- rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr)
-#define VOP_REMOVE(dvp,d,cr,rv) \
- rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr)
-#define VOP_LINK(tdvp,fvp,d,cr,rv) \
- rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr)
-#define VOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv) \
- rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr)
-#define VOP_MKDIR(dp,d,vap,vpp,cr,rv) \
- rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr)
-#define VOP_RMDIR(dp,d,cr,rv) \
- rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr)
-#define VOP_READDIR(vp,uiop,cr,eofp,rv) \
- rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp)
-#define VOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv) \
- rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr)
-#define VOP_READLINK(vp,uiop,fl,cr,rv) \
- rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,fl,cr)
-#define VOP_FSYNC(vp,f,cr,b,e,rv) \
- rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e)
-#define VOP_INACTIVE(vp, cr, rv) \
- rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr)
-#define VOP_RELEASE(vp, rv) \
- rv = _VOP_(vop_release, vp)((vp)->v_fbhv)
-#define VOP_FID2(vp, fidp, rv) \
- rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp)
-#define VOP_RWLOCK(vp,i) \
- (void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
-#define VOP_RWLOCK_TRY(vp,i) \
- _VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
-#define VOP_RWUNLOCK(vp,i) \
- (void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i)
-#define VOP_FRLOCK(vp,c,fl,flags,offset,fr,rv) \
- rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr)
-#define VOP_RECLAIM(vp, rv) \
- rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv)
-#define VOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv) \
- rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred)
-#define VOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv) \
- rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred)
-#define VOP_ATTR_REMOVE(vp, name, flags, cred, rv) \
- rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred)
-#define VOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv) \
- rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred)
-#define VOP_LINK_REMOVED(vp, dvp, linkzero) \
- (void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero)
-#define VOP_VNODE_CHANGE(vp, cmd, val) \
- (void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val)
-/*
- * These are page cache functions that now go thru VOPs.
- * 'last' parameter is unused and left in for IRIX compatibility
- */
-#define VOP_TOSS_PAGES(vp, first, last, fiopt) \
- _VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt)
-/*
- * 'last' parameter is unused and left in for IRIX compatibility
- */
-#define VOP_FLUSHINVAL_PAGES(vp, first, last, fiopt) \
- _VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt)
-/*
- * 'last' parameter is unused and left in for IRIX compatibility
- */
-#define VOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv) \
- rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt)
-#define VOP_IOCTL(vp, inode, filp, fl, cmd, arg, rv) \
- rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,fl,cmd,arg)
-#define VOP_IFLUSH(vp, flags, rv) \
- rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags)
-
-/*
- * Flags for read/write calls - same values as IRIX
- */
-#define IO_ISAIO 0x00001 /* don't wait for completion */
-#define IO_ISDIRECT 0x00004 /* bypass page cache */
-#define IO_INVIS 0x00020 /* don't update inode timestamps */
-
-/*
- * Flags for VOP_IFLUSH call
- */
-#define FLUSH_SYNC 1 /* wait for flush to complete */
-#define FLUSH_INODE 2 /* flush the inode itself */
-#define FLUSH_LOG 4 /* force the last log entry for
- * this inode out to disk */
-
-/*
- * Flush/Invalidate options for VOP_TOSS_PAGES, VOP_FLUSHINVAL_PAGES and
- * VOP_FLUSH_PAGES.
- */
-#define FI_NONE 0 /* none */
-#define FI_REMAPF 1 /* Do a remapf prior to the operation */
-#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation.
- Prevent VM access to the pages until
- the operation completes. */
-
-/*
- * Vnode attributes. va_mask indicates those attributes the caller
- * wants to set or extract.
- */
-typedef struct vattr {
- int va_mask; /* bit-mask of attributes present */
- mode_t va_mode; /* file access mode and type */
- xfs_nlink_t va_nlink; /* number of references to file */
- uid_t va_uid; /* owner user id */
- gid_t va_gid; /* owner group id */
- xfs_ino_t va_nodeid; /* file id */
- xfs_off_t va_size; /* file size in bytes */
- u_long va_blocksize; /* blocksize preferred for i/o */
- struct timespec va_atime; /* time of last access */
- struct timespec va_mtime; /* time of last modification */
- struct timespec va_ctime; /* time file changed */
- u_int va_gen; /* generation number of file */
- xfs_dev_t va_rdev; /* device the special file represents */
- __int64_t va_nblocks; /* number of blocks allocated */
- u_long va_xflags; /* random extended file flags */
- u_long va_extsize; /* file extent size */
- u_long va_nextents; /* number of extents in file */
- u_long va_anextents; /* number of attr extents in file */
- prid_t va_projid; /* project id */
-} vattr_t;
-
-/*
- * setattr or getattr attributes
- */
-#define XFS_AT_TYPE 0x00000001
-#define XFS_AT_MODE 0x00000002
-#define XFS_AT_UID 0x00000004
-#define XFS_AT_GID 0x00000008
-#define XFS_AT_FSID 0x00000010
-#define XFS_AT_NODEID 0x00000020
-#define XFS_AT_NLINK 0x00000040
-#define XFS_AT_SIZE 0x00000080
-#define XFS_AT_ATIME 0x00000100
-#define XFS_AT_MTIME 0x00000200
-#define XFS_AT_CTIME 0x00000400
-#define XFS_AT_RDEV 0x00000800
-#define XFS_AT_BLKSIZE 0x00001000
-#define XFS_AT_NBLOCKS 0x00002000
-#define XFS_AT_VCODE 0x00004000
-#define XFS_AT_MAC 0x00008000
-#define XFS_AT_UPDATIME 0x00010000
-#define XFS_AT_UPDMTIME 0x00020000
-#define XFS_AT_UPDCTIME 0x00040000
-#define XFS_AT_ACL 0x00080000
-#define XFS_AT_CAP 0x00100000
-#define XFS_AT_INF 0x00200000
-#define XFS_AT_XFLAGS 0x00400000
-#define XFS_AT_EXTSIZE 0x00800000
-#define XFS_AT_NEXTENTS 0x01000000
-#define XFS_AT_ANEXTENTS 0x02000000
-#define XFS_AT_PROJID 0x04000000
-#define XFS_AT_SIZE_NOPERM 0x08000000
-#define XFS_AT_GENCOUNT 0x10000000
-
-#define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
- XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
- XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
- XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
- XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
- XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
-
-#define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
- XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
- XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
- XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
-
-#define XFS_AT_TIMES (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
-
-#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
-
-#define XFS_AT_NOSET (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
- XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
- XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
-
-/*
- * Modes.
- */
-#define VSUID S_ISUID /* set user id on execution */
-#define VSGID S_ISGID /* set group id on execution */
-#define VSVTX S_ISVTX /* save swapped text even after use */
-#define VREAD S_IRUSR /* read, write, execute permissions */
-#define VWRITE S_IWUSR
-#define VEXEC S_IXUSR
-
-#define MODEMASK S_IALLUGO /* mode bits plus permission bits */
-
-/*
- * Check whether mandatory file locking is enabled.
- */
-#define MANDLOCK(vp, mode) \
- (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
-
-extern void vn_init(void);
-extern vnode_t *vn_initialize(struct inode *);
-
-/*
- * vnode_map structures _must_ match vn_epoch and vnode structure sizes.
- */
-typedef struct vnode_map {
- vfs_t *v_vfsp;
- vnumber_t v_number; /* in-core vnode number */
- xfs_ino_t v_ino; /* inode # */
-} vmap_t;
-
-#define VMAP(vp, vmap) {(vmap).v_vfsp = (vp)->v_vfsp, \
- (vmap).v_number = (vp)->v_number, \
- (vmap).v_ino = (vp)->v_inode.i_ino; }
-
-extern int vn_revalidate(struct vnode *);
-extern void vn_revalidate_core(struct vnode *, vattr_t *);
-
-extern void vn_iowait(struct vnode *vp);
-extern void vn_iowake(struct vnode *vp);
-
-static inline int vn_count(struct vnode *vp)
-{
- return atomic_read(&LINVFS_GET_IP(vp)->i_count);
-}
-
-/*
- * Vnode reference counting functions (and macros for compatibility).
- */
-extern vnode_t *vn_hold(struct vnode *);
-
-#if defined(XFS_VNODE_TRACE)
-#define VN_HOLD(vp) \
- ((void)vn_hold(vp), \
- vn_trace_hold(vp, __FILE__, __LINE__, (inst_t *)__return_address))
-#define VN_RELE(vp) \
- (vn_trace_rele(vp, __FILE__, __LINE__, (inst_t *)__return_address), \
- iput(LINVFS_GET_IP(vp)))
-#else
-#define VN_HOLD(vp) ((void)vn_hold(vp))
-#define VN_RELE(vp) (iput(LINVFS_GET_IP(vp)))
-#endif
-
-static inline struct vnode *vn_grab(struct vnode *vp)
-{
- struct inode *inode = igrab(LINVFS_GET_IP(vp));
- return inode ? LINVFS_GET_VP(inode) : NULL;
-}
-
-/*
- * Vname handling macros.
- */
-#define VNAME(dentry) ((char *) (dentry)->d_name.name)
-#define VNAMELEN(dentry) ((dentry)->d_name.len)
-#define VNAME_TO_VNODE(dentry) (LINVFS_GET_VP((dentry)->d_inode))
-
-/*
- * Vnode spinlock manipulation.
- */
-#define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock)
-#define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s)
-#define VN_FLAGSET(vp,b) vn_flagset(vp,b)
-#define VN_FLAGCLR(vp,b) vn_flagclr(vp,b)
-
-static __inline__ void vn_flagset(struct vnode *vp, uint flag)
-{
- spin_lock(&vp->v_lock);
- vp->v_flag |= flag;
- spin_unlock(&vp->v_lock);
-}
-
-static __inline__ void vn_flagclr(struct vnode *vp, uint flag)
-{
- spin_lock(&vp->v_lock);
- vp->v_flag &= ~flag;
- spin_unlock(&vp->v_lock);
-}
-
-/*
- * Dealing with bad inodes
- */
-static inline void vn_mark_bad(struct vnode *vp)
-{
- make_bad_inode(LINVFS_GET_IP(vp));
-}
-
-static inline int VN_BAD(struct vnode *vp)
-{
- return is_bad_inode(LINVFS_GET_IP(vp));
-}
-
-/*
- * Extracting atime values in various formats
- */
-static inline void vn_atime_to_bstime(struct vnode *vp, xfs_bstime_t *bs_atime)
-{
- bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec;
- bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec;
-}
-
-static inline void vn_atime_to_timespec(struct vnode *vp, struct timespec *ts)
-{
- *ts = vp->v_inode.i_atime;
-}
-
-static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
-{
- *tt = vp->v_inode.i_atime.tv_sec;
-}
-
-/*
- * Some useful predicates.
- */
-#define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
-#define VN_CACHED(vp) (LINVFS_GET_IP(vp)->i_mapping->nrpages)
-#define VN_DIRTY(vp) mapping_tagged(LINVFS_GET_IP(vp)->i_mapping, \
- PAGECACHE_TAG_DIRTY)
-#define VMODIFY(vp) VN_FLAGSET(vp, VMODIFIED)
-#define VUNMODIFY(vp) VN_FLAGCLR(vp, VMODIFIED)
-
-/*
- * Flags to VOP_SETATTR/VOP_GETATTR.
- */
-#define ATTR_UTIME 0x01 /* non-default utime(2) request */
-#define ATTR_DMI 0x08 /* invocation from a DMI function */
-#define ATTR_LAZY 0x80 /* set/get attributes lazily */
-#define ATTR_NONBLOCK 0x100 /* return EAGAIN if operation would block */
-#define ATTR_NOLOCK 0x200 /* Don't grab any conflicting locks */
-#define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */
-
-/*
- * Flags to VOP_FSYNC and VOP_RECLAIM.
- */
-#define FSYNC_NOWAIT 0 /* asynchronous flush */
-#define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */
-#define FSYNC_INVAL 0x2 /* flush and invalidate cached data */
-#define FSYNC_DATA 0x4 /* synchronous fsync of data only */
-
-/*
- * Tracking vnode activity.
- */
-#if defined(XFS_VNODE_TRACE)
-
-#define VNODE_TRACE_SIZE 16 /* number of trace entries */
-#define VNODE_KTRACE_ENTRY 1
-#define VNODE_KTRACE_EXIT 2
-#define VNODE_KTRACE_HOLD 3
-#define VNODE_KTRACE_REF 4
-#define VNODE_KTRACE_RELE 5
-
-extern void vn_trace_entry(struct vnode *, const char *, inst_t *);
-extern void vn_trace_exit(struct vnode *, const char *, inst_t *);
-extern void vn_trace_hold(struct vnode *, char *, int, inst_t *);
-extern void vn_trace_ref(struct vnode *, char *, int, inst_t *);
-extern void vn_trace_rele(struct vnode *, char *, int, inst_t *);
-
-#define VN_TRACE(vp) \
- vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address)
-#else
-#define vn_trace_entry(a,b,c)
-#define vn_trace_exit(a,b,c)
-#define vn_trace_hold(a,b,c,d)
-#define vn_trace_ref(a,b,c,d)
-#define vn_trace_rele(a,b,c,d)
-#define VN_TRACE(vp)
-#endif
-
-#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/mrlock.h
index 16b44c3c236..e3c92d19e54 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/mrlock.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
@@ -20,29 +20,35 @@
#include <linux/rwsem.h>
-enum { MR_NONE, MR_ACCESS, MR_UPDATE };
-
typedef struct {
struct rw_semaphore mr_lock;
+#if defined(DEBUG) || defined(XFS_WARN)
int mr_writer;
+#endif
} mrlock_t;
+#if defined(DEBUG) || defined(XFS_WARN)
+#define mrinit(mrp, name) \
+ do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
+#else
#define mrinit(mrp, name) \
- ( (mrp)->mr_writer = 0, init_rwsem(&(mrp)->mr_lock) )
+ do { init_rwsem(&(mrp)->mr_lock); } while (0)
+#endif
+
#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
#define mrfree(mrp) do { } while (0)
-#define mraccess(mrp) mraccessf(mrp, 0)
-#define mrupdate(mrp) mrupdatef(mrp, 0)
-static inline void mraccessf(mrlock_t *mrp, int flags)
+static inline void mraccess_nested(mrlock_t *mrp, int subclass)
{
- down_read(&mrp->mr_lock);
+ down_read_nested(&mrp->mr_lock, subclass);
}
-static inline void mrupdatef(mrlock_t *mrp, int flags)
+static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
{
- down_write(&mrp->mr_lock);
+ down_write_nested(&mrp->mr_lock, subclass);
+#if defined(DEBUG) || defined(XFS_WARN)
mrp->mr_writer = 1;
+#endif
}
static inline int mrtryaccess(mrlock_t *mrp)
@@ -54,39 +60,31 @@ static inline int mrtryupdate(mrlock_t *mrp)
{
if (!down_write_trylock(&mrp->mr_lock))
return 0;
+#if defined(DEBUG) || defined(XFS_WARN)
mrp->mr_writer = 1;
+#endif
return 1;
}
-static inline void mrunlock(mrlock_t *mrp)
+static inline void mrunlock_excl(mrlock_t *mrp)
{
- if (mrp->mr_writer) {
- mrp->mr_writer = 0;
- up_write(&mrp->mr_lock);
- } else {
- up_read(&mrp->mr_lock);
- }
+#if defined(DEBUG) || defined(XFS_WARN)
+ mrp->mr_writer = 0;
+#endif
+ up_write(&mrp->mr_lock);
}
-static inline void mrdemote(mrlock_t *mrp)
+static inline void mrunlock_shared(mrlock_t *mrp)
{
- mrp->mr_writer = 0;
- downgrade_write(&mrp->mr_lock);
+ up_read(&mrp->mr_lock);
}
-#ifdef DEBUG
-/*
- * Debug-only routine, without some platform-specific asm code, we can
- * now only answer requests regarding whether we hold the lock for write
- * (reader state is outside our visibility, we only track writer state).
- * Note: means !ismrlocked would give false positivies, so don't do that.
- */
-static inline int ismrlocked(mrlock_t *mrp, int type)
+static inline void mrdemote(mrlock_t *mrp)
{
- if (mrp && type == MR_UPDATE)
- return mrp->mr_writer;
- return 1;
-}
+#if defined(DEBUG) || defined(XFS_WARN)
+ mrp->mr_writer = 0;
#endif
+ downgrade_write(&mrp->mr_lock);
+}
#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
deleted file mode 100644
index 772ac48329e..00000000000
--- a/fs/xfs/quota/xfs_dquot.c
+++ /dev/null
@@ -1,1601 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-
-
-/*
- LOCK ORDER
-
- inode lock (ilock)
- dquot hash-chain lock (hashlock)
- xqm dquot freelist lock (freelistlock
- mount's dquot list lock (mplistlock)
- user dquot lock - lock ordering among dquots is based on the uid or gid
- group dquot lock - similar to udquots. Between the two dquots, the udquot
- has to be locked first.
- pin lock - the dquot lock must be held to take this lock.
- flush lock - ditto.
-*/
-
-STATIC void xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *);
-
-#ifdef DEBUG
-xfs_buftarg_t *xfs_dqerror_target;
-int xfs_do_dqerror;
-int xfs_dqreq_num;
-int xfs_dqerror_mod = 33;
-#endif
-
-/*
- * Allocate and initialize a dquot. We don't always allocate fresh memory;
- * we try to reclaim a free dquot if the number of incore dquots are above
- * a threshold.
- * The only field inside the core that gets initialized at this point
- * is the d_id field. The idea is to fill in the entire q_core
- * when we read in the on disk dquot.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqinit(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- uint type)
-{
- xfs_dquot_t *dqp;
- boolean_t brandnewdquot;
-
- brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
- dqp->dq_flags = type;
- dqp->q_core.d_id = cpu_to_be32(id);
- dqp->q_mount = mp;
-
- /*
- * No need to re-initialize these if this is a reclaimed dquot.
- */
- if (brandnewdquot) {
- dqp->dq_flnext = dqp->dq_flprev = dqp;
- mutex_init(&dqp->q_qlock);
- initnsema(&dqp->q_flock, 1, "fdq");
- sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
-
-#ifdef XFS_DQUOT_TRACE
- dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP);
- xfs_dqtrace_entry(dqp, "DQINIT");
-#endif
- } else {
- /*
- * Only the q_core portion was zeroed in dqreclaim_one().
- * So, we need to reset others.
- */
- dqp->q_nrefs = 0;
- dqp->q_blkno = 0;
- dqp->MPL_NEXT = dqp->HL_NEXT = NULL;
- dqp->HL_PREVP = dqp->MPL_PREVP = NULL;
- dqp->q_bufoffset = 0;
- dqp->q_fileoffset = 0;
- dqp->q_transp = NULL;
- dqp->q_gdquot = NULL;
- dqp->q_res_bcount = 0;
- dqp->q_res_icount = 0;
- dqp->q_res_rtbcount = 0;
- dqp->q_pincount = 0;
- dqp->q_hash = NULL;
- ASSERT(dqp->dq_flnext == dqp->dq_flprev);
-
-#ifdef XFS_DQUOT_TRACE
- ASSERT(dqp->q_trace);
- xfs_dqtrace_entry(dqp, "DQRECLAIMED_INIT");
-#endif
- }
-
- /*
- * log item gets initialized later
- */
- return (dqp);
-}
-
-/*
- * This is called to free all the memory associated with a dquot
- */
-void
-xfs_qm_dqdestroy(
- xfs_dquot_t *dqp)
-{
- ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp));
-
- mutex_destroy(&dqp->q_qlock);
- freesema(&dqp->q_flock);
- sv_destroy(&dqp->q_pinwait);
-
-#ifdef XFS_DQUOT_TRACE
- if (dqp->q_trace)
- ktrace_free(dqp->q_trace);
- dqp->q_trace = NULL;
-#endif
- kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
- atomic_dec(&xfs_Gqm->qm_totaldquots);
-}
-
-/*
- * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
- */
-STATIC void
-xfs_qm_dqinit_core(
- xfs_dqid_t id,
- uint type,
- xfs_dqblk_t *d)
-{
- /*
- * Caller has zero'd the entire dquot 'chunk' already.
- */
- d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
- d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
- d->dd_diskdq.d_id = cpu_to_be32(id);
- d->dd_diskdq.d_flags = type;
-}
-
-
-#ifdef XFS_DQUOT_TRACE
-/*
- * Dquot tracing for debugging.
- */
-/* ARGSUSED */
-void
-__xfs_dqtrace_entry(
- xfs_dquot_t *dqp,
- char *func,
- void *retaddr,
- xfs_inode_t *ip)
-{
- xfs_dquot_t *udqp = NULL;
- xfs_ino_t ino = 0;
-
- ASSERT(dqp->q_trace);
- if (ip) {
- ino = ip->i_ino;
- udqp = ip->i_udquot;
- }
- ktrace_enter(dqp->q_trace,
- (void *)(__psint_t)DQUOT_KTRACE_ENTRY,
- (void *)func,
- (void *)(__psint_t)dqp->q_nrefs,
- (void *)(__psint_t)dqp->dq_flags,
- (void *)(__psint_t)dqp->q_res_bcount,
- (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_bcount),
- (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_icount),
- (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_hardlimit),
- (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_softlimit),
- (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_hardlimit),
- (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_softlimit),
- (void *)(__psint_t)be32_to_cpu(dqp->q_core.d_id),
- (void *)(__psint_t)current_pid(),
- (void *)(__psint_t)ino,
- (void *)(__psint_t)retaddr,
- (void *)(__psint_t)udqp);
- return;
-}
-#endif
-
-
-/*
- * If default limits are in force, push them into the dquot now.
- * We overwrite the dquot limits only if they are zero and this
- * is not the root dquot.
- */
-void
-xfs_qm_adjust_dqlimits(
- xfs_mount_t *mp,
- xfs_disk_dquot_t *d)
-{
- xfs_quotainfo_t *q = mp->m_quotainfo;
-
- ASSERT(d->d_id);
-
- if (q->qi_bsoftlimit && !d->d_blk_softlimit)
- d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
- if (q->qi_bhardlimit && !d->d_blk_hardlimit)
- d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
- if (q->qi_isoftlimit && !d->d_ino_softlimit)
- d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
- if (q->qi_ihardlimit && !d->d_ino_hardlimit)
- d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
- if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
- d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
- if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
- d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
-}
-
-/*
- * Check the limits and timers of a dquot and start or reset timers
- * if necessary.
- * This gets called even when quota enforcement is OFF, which makes our
- * life a little less complicated. (We just don't reject any quota
- * reservations in that case, when enforcement is off).
- * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
- * enforcement's off.
- * In contrast, warnings are a little different in that they don't
- * 'automatically' get started when limits get exceeded. They do
- * get reset to zero, however, when we find the count to be under
- * the soft limit (they are only ever set non-zero via userspace).
- */
-void
-xfs_qm_adjust_dqtimers(
- xfs_mount_t *mp,
- xfs_disk_dquot_t *d)
-{
- ASSERT(d->d_id);
-
-#ifdef QUOTADEBUG
- if (d->d_blk_hardlimit)
- ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
- be64_to_cpu(d->d_blk_hardlimit));
- if (d->d_ino_hardlimit)
- ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
- be64_to_cpu(d->d_ino_hardlimit));
- if (d->d_rtb_hardlimit)
- ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
- be64_to_cpu(d->d_rtb_hardlimit));
-#endif
- if (!d->d_btimer) {
- if ((d->d_blk_softlimit &&
- (be64_to_cpu(d->d_bcount) >=
- be64_to_cpu(d->d_blk_softlimit))) ||
- (d->d_blk_hardlimit &&
- (be64_to_cpu(d->d_bcount) >=
- be64_to_cpu(d->d_blk_hardlimit)))) {
- d->d_btimer = cpu_to_be32(get_seconds() +
- XFS_QI_BTIMELIMIT(mp));
- } else {
- d->d_bwarns = 0;
- }
- } else {
- if ((!d->d_blk_softlimit ||
- (be64_to_cpu(d->d_bcount) <
- be64_to_cpu(d->d_blk_softlimit))) &&
- (!d->d_blk_hardlimit ||
- (be64_to_cpu(d->d_bcount) <
- be64_to_cpu(d->d_blk_hardlimit)))) {
- d->d_btimer = 0;
- }
- }
-
- if (!d->d_itimer) {
- if ((d->d_ino_softlimit &&
- (be64_to_cpu(d->d_icount) >=
- be64_to_cpu(d->d_ino_softlimit))) ||
- (d->d_ino_hardlimit &&
- (be64_to_cpu(d->d_icount) >=
- be64_to_cpu(d->d_ino_hardlimit)))) {
- d->d_itimer = cpu_to_be32(get_seconds() +
- XFS_QI_ITIMELIMIT(mp));
- } else {
- d->d_iwarns = 0;
- }
- } else {
- if ((!d->d_ino_softlimit ||
- (be64_to_cpu(d->d_icount) <
- be64_to_cpu(d->d_ino_softlimit))) &&
- (!d->d_ino_hardlimit ||
- (be64_to_cpu(d->d_icount) <
- be64_to_cpu(d->d_ino_hardlimit)))) {
- d->d_itimer = 0;
- }
- }
-
- if (!d->d_rtbtimer) {
- if ((d->d_rtb_softlimit &&
- (be64_to_cpu(d->d_rtbcount) >=
- be64_to_cpu(d->d_rtb_softlimit))) ||
- (d->d_rtb_hardlimit &&
- (be64_to_cpu(d->d_rtbcount) >=
- be64_to_cpu(d->d_rtb_hardlimit)))) {
- d->d_rtbtimer = cpu_to_be32(get_seconds() +
- XFS_QI_RTBTIMELIMIT(mp));
- } else {
- d->d_rtbwarns = 0;
- }
- } else {
- if ((!d->d_rtb_softlimit ||
- (be64_to_cpu(d->d_rtbcount) <
- be64_to_cpu(d->d_rtb_softlimit))) &&
- (!d->d_rtb_hardlimit ||
- (be64_to_cpu(d->d_rtbcount) <
- be64_to_cpu(d->d_rtb_hardlimit)))) {
- d->d_rtbtimer = 0;
- }
- }
-}
-
-/*
- * initialize a buffer full of dquots and log the whole thing
- */
-STATIC void
-xfs_qm_init_dquot_blk(
- xfs_trans_t *tp,
- xfs_mount_t *mp,
- xfs_dqid_t id,
- uint type,
- xfs_buf_t *bp)
-{
- xfs_dqblk_t *d;
- int curid, i;
-
- ASSERT(tp);
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
-
- d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
-
- /*
- * ID of the first dquot in the block - id's are zero based.
- */
- curid = id - (id % XFS_QM_DQPERBLK(mp));
- ASSERT(curid >= 0);
- memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)));
- for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++)
- xfs_qm_dqinit_core(curid, type, d);
- xfs_trans_dquot_buf(tp, bp,
- (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF :
- ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF :
- XFS_BLI_GDQUOT_BUF)));
- xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1);
-}
-
-
-
-/*
- * Allocate a block and fill it with dquots.
- * This is called when the bmapi finds a hole.
- */
-STATIC int
-xfs_qm_dqalloc(
- xfs_trans_t **tpp,
- xfs_mount_t *mp,
- xfs_dquot_t *dqp,
- xfs_inode_t *quotip,
- xfs_fileoff_t offset_fsb,
- xfs_buf_t **O_bpp)
-{
- xfs_fsblock_t firstblock;
- xfs_bmap_free_t flist;
- xfs_bmbt_irec_t map;
- int nmaps, error, committed;
- xfs_buf_t *bp;
- xfs_trans_t *tp = *tpp;
-
- ASSERT(tp != NULL);
- xfs_dqtrace_entry(dqp, "DQALLOC");
-
- /*
- * Initialize the bmap freelist prior to calling bmapi code.
- */
- XFS_BMAP_INIT(&flist, &firstblock);
- xfs_ilock(quotip, XFS_ILOCK_EXCL);
- /*
- * Return if this type of quotas is turned off while we didn't
- * have an inode lock
- */
- if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
- xfs_iunlock(quotip, XFS_ILOCK_EXCL);
- return (ESRCH);
- }
-
- /*
- * xfs_trans_commit normally decrements the vnode ref count
- * when it unlocks the inode. Since we want to keep the quota
- * inode around, we bump the vnode ref count now.
- */
- VN_HOLD(XFS_ITOV(quotip));
-
- xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
- nmaps = 1;
- if ((error = xfs_bmapi(tp, quotip,
- offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
- XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
- &firstblock,
- XFS_QM_DQALLOC_SPACE_RES(mp),
- &map, &nmaps, &flist))) {
- goto error0;
- }
- ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
- ASSERT(nmaps == 1);
- ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
- (map.br_startblock != HOLESTARTBLOCK));
-
- /*
- * Keep track of the blkno to save a lookup later
- */
- dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
- /* now we can just get the buffer (there's nothing to read yet) */
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
- dqp->q_blkno,
- XFS_QI_DQCHUNKLEN(mp),
- 0);
- if (!bp || (error = XFS_BUF_GETERROR(bp)))
- goto error1;
- /*
- * Make a chunk of dquots out of this buffer and log
- * the entire thing.
- */
- xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
- dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
-
- /*
- * xfs_bmap_finish() may commit the current transaction and
- * start a second transaction if the freelist is not empty.
- *
- * Since we still want to modify this buffer, we need to
- * ensure that the buffer is not released on commit of
- * the first transaction and ensure the buffer is added to the
- * second transaction.
- *
- * If there is only one transaction then don't stop the buffer
- * from being released when it commits later on.
- */
-
- xfs_trans_bhold(tp, bp);
-
- if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) {
- goto error1;
- }
-
- if (committed) {
- tp = *tpp;
- xfs_trans_bjoin(tp, bp);
- } else {
- xfs_trans_bhold_release(tp, bp);
- }
-
- *O_bpp = bp;
- return 0;
-
- error1:
- xfs_bmap_cancel(&flist);
- error0:
- xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-
- return (error);
-}
-
-/*
- * Maps a dquot to the buffer containing its on-disk version.
- * This returns a ptr to the buffer containing the on-disk dquot
- * in the bpp param, and a ptr to the on-disk dquot within that buffer
- */
-STATIC int
-xfs_qm_dqtobp(
- xfs_trans_t **tpp,
- xfs_dquot_t *dqp,
- xfs_disk_dquot_t **O_ddpp,
- xfs_buf_t **O_bpp,
- uint flags)
-{
- xfs_bmbt_irec_t map;
- int nmaps, error;
- xfs_buf_t *bp;
- xfs_inode_t *quotip;
- xfs_mount_t *mp;
- xfs_disk_dquot_t *ddq;
- xfs_dqid_t id;
- boolean_t newdquot;
- xfs_trans_t *tp = (tpp ? *tpp : NULL);
-
- mp = dqp->q_mount;
- id = be32_to_cpu(dqp->q_core.d_id);
- nmaps = 1;
- newdquot = B_FALSE;
-
- /*
- * If we don't know where the dquot lives, find out.
- */
- if (dqp->q_blkno == (xfs_daddr_t) 0) {
- /* We use the id as an index */
- dqp->q_fileoffset = (xfs_fileoff_t)id / XFS_QM_DQPERBLK(mp);
- nmaps = 1;
- quotip = XFS_DQ_TO_QIP(dqp);
- xfs_ilock(quotip, XFS_ILOCK_SHARED);
- /*
- * Return if this type of quotas is turned off while we didn't
- * have an inode lock
- */
- if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
- xfs_iunlock(quotip, XFS_ILOCK_SHARED);
- return (ESRCH);
- }
- /*
- * Find the block map; no allocations yet
- */
- error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
- XFS_DQUOT_CLUSTER_SIZE_FSB,
- XFS_BMAPI_METADATA,
- NULL, 0, &map, &nmaps, NULL);
-
- xfs_iunlock(quotip, XFS_ILOCK_SHARED);
- if (error)
- return (error);
- ASSERT(nmaps == 1);
- ASSERT(map.br_blockcount == 1);
-
- /*
- * offset of dquot in the (fixed sized) dquot chunk.
- */
- dqp->q_bufoffset = (id % XFS_QM_DQPERBLK(mp)) *
- sizeof(xfs_dqblk_t);
- if (map.br_startblock == HOLESTARTBLOCK) {
- /*
- * We don't allocate unless we're asked to
- */
- if (!(flags & XFS_QMOPT_DQALLOC))
- return (ENOENT);
-
- ASSERT(tp);
- if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
- dqp->q_fileoffset, &bp)))
- return (error);
- tp = *tpp;
- newdquot = B_TRUE;
- } else {
- /*
- * store the blkno etc so that we don't have to do the
- * mapping all the time
- */
- dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
- }
- }
- ASSERT(dqp->q_blkno != DELAYSTARTBLOCK);
- ASSERT(dqp->q_blkno != HOLESTARTBLOCK);
-
- /*
- * Read in the buffer, unless we've just done the allocation
- * (in which case we already have the buf).
- */
- if (! newdquot) {
- xfs_dqtrace_entry(dqp, "DQTOBP READBUF");
- if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
- dqp->q_blkno,
- XFS_QI_DQCHUNKLEN(mp),
- 0, &bp))) {
- return (error);
- }
- if (error || !bp)
- return XFS_ERROR(error);
- }
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
-
- /*
- * calculate the location of the dquot inside the buffer.
- */
- ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset);
-
- /*
- * A simple sanity check in case we got a corrupted dquot...
- */
- if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
- flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
- "dqtobp")) {
- if (!(flags & XFS_QMOPT_DQREPAIR)) {
- xfs_trans_brelse(tp, bp);
- return XFS_ERROR(EIO);
- }
- XFS_BUF_BUSY(bp); /* We dirtied this */
- }
-
- *O_bpp = bp;
- *O_ddpp = ddq;
-
- return (0);
-}
-
-
-/*
- * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
- * and release the buffer immediately.
- *
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqread(
- xfs_trans_t **tpp,
- xfs_dqid_t id,
- xfs_dquot_t *dqp, /* dquot to get filled in */
- uint flags)
-{
- xfs_disk_dquot_t *ddqp;
- xfs_buf_t *bp;
- int error;
- xfs_trans_t *tp;
-
- ASSERT(tpp);
-
- /*
- * get a pointer to the on-disk dquot and the buffer containing it
- * dqp already knows its own type (GROUP/USER).
- */
- xfs_dqtrace_entry(dqp, "DQREAD");
- if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
- return (error);
- }
- tp = *tpp;
-
- /* copy everything from disk dquot to the incore dquot */
- memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
- ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
- xfs_qm_dquot_logitem_init(dqp);
-
- /*
- * Reservation counters are defined as reservation plus current usage
- * to avoid having to add everytime.
- */
- dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
- dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
- dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
-
- /* Mark the buf so that this will stay incore a little longer */
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
-
- /*
- * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
- * So we need to release with xfs_trans_brelse().
- * The strategy here is identical to that of inodes; we lock
- * the dquot in xfs_qm_dqget() before making it accessible to
- * others. This is because dquots, like inodes, need a good level of
- * concurrency, and we don't want to take locks on the entire buffers
- * for dquot accesses.
- * Note also that the dquot buffer may even be dirty at this point, if
- * this particular dquot was repaired. We still aren't afraid to
- * brelse it because we have the changes incore.
- */
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
- xfs_trans_brelse(tp, bp);
-
- return (error);
-}
-
-
-/*
- * allocate an incore dquot from the kernel heap,
- * and fill its core with quota information kept on disk.
- * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
- * if it wasn't already allocated.
- */
-STATIC int
-xfs_qm_idtodq(
- xfs_mount_t *mp,
- xfs_dqid_t id, /* gid or uid, depending on type */
- uint type, /* UDQUOT or GDQUOT */
- uint flags, /* DQALLOC, DQREPAIR */
- xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */
-{
- xfs_dquot_t *dqp;
- int error;
- xfs_trans_t *tp;
- int cancelflags=0;
-
- dqp = xfs_qm_dqinit(mp, id, type);
- tp = NULL;
- if (flags & XFS_QMOPT_DQALLOC) {
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
- if ((error = xfs_trans_reserve(tp,
- XFS_QM_DQALLOC_SPACE_RES(mp),
- XFS_WRITE_LOG_RES(mp) +
- BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1 +
- 128,
- 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_WRITE_LOG_COUNT))) {
- cancelflags = 0;
- goto error0;
- }
- cancelflags = XFS_TRANS_RELEASE_LOG_RES;
- }
-
- /*
- * Read it from disk; xfs_dqread() takes care of
- * all the necessary initialization of dquot's fields (locks, etc)
- */
- if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
- /*
- * This can happen if quotas got turned off (ESRCH),
- * or if the dquot didn't exist on disk and we ask to
- * allocate (ENOENT).
- */
- xfs_dqtrace_entry(dqp, "DQREAD FAIL");
- cancelflags |= XFS_TRANS_ABORT;
- goto error0;
- }
- if (tp) {
- if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
- NULL)))
- goto error1;
- }
-
- *O_dqpp = dqp;
- return (0);
-
- error0:
- ASSERT(error);
- if (tp)
- xfs_trans_cancel(tp, cancelflags);
- error1:
- xfs_qm_dqdestroy(dqp);
- *O_dqpp = NULL;
- return (error);
-}
-
-/*
- * Lookup a dquot in the incore dquot hashtable. We keep two separate
- * hashtables for user and group dquots; and, these are global tables
- * inside the XQM, not per-filesystem tables.
- * The hash chain must be locked by caller, and it is left locked
- * on return. Returning dquot is locked.
- */
-STATIC int
-xfs_qm_dqlookup(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- xfs_dqhash_t *qh,
- xfs_dquot_t **O_dqpp)
-{
- xfs_dquot_t *dqp;
- uint flist_locked;
- xfs_dquot_t *d;
-
- ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
-
- flist_locked = B_FALSE;
-
- /*
- * Traverse the hashchain looking for a match
- */
- for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) {
- /*
- * We already have the hashlock. We don't need the
- * dqlock to look at the id field of the dquot, since the
- * id can't be modified without the hashlock anyway.
- */
- if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
- xfs_dqtrace_entry(dqp, "DQFOUND BY LOOKUP");
- /*
- * All in core dquots must be on the dqlist of mp
- */
- ASSERT(dqp->MPL_PREVP != NULL);
-
- xfs_dqlock(dqp);
- if (dqp->q_nrefs == 0) {
- ASSERT (XFS_DQ_IS_ON_FREELIST(dqp));
- if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
- xfs_dqtrace_entry(dqp, "DQLOOKUP: WANT");
-
- /*
- * We may have raced with dqreclaim_one()
- * (and lost). So, flag that we don't
- * want the dquot to be reclaimed.
- */
- dqp->dq_flags |= XFS_DQ_WANT;
- xfs_dqunlock(dqp);
- xfs_qm_freelist_lock(xfs_Gqm);
- xfs_dqlock(dqp);
- dqp->dq_flags &= ~(XFS_DQ_WANT);
- }
- flist_locked = B_TRUE;
- }
-
- /*
- * id couldn't have changed; we had the hashlock all
- * along
- */
- ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
-
- if (flist_locked) {
- if (dqp->q_nrefs != 0) {
- xfs_qm_freelist_unlock(xfs_Gqm);
- flist_locked = B_FALSE;
- } else {
- /*
- * take it off the freelist
- */
- xfs_dqtrace_entry(dqp,
- "DQLOOKUP: TAKEOFF FL");
- XQM_FREELIST_REMOVE(dqp);
- /* xfs_qm_freelist_print(&(xfs_Gqm->
- qm_dqfreelist),
- "after removal"); */
- }
- }
-
- /*
- * grab a reference
- */
- XFS_DQHOLD(dqp);
-
- if (flist_locked)
- xfs_qm_freelist_unlock(xfs_Gqm);
- /*
- * move the dquot to the front of the hashchain
- */
- ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
- if (dqp->HL_PREVP != &qh->qh_next) {
- xfs_dqtrace_entry(dqp,
- "DQLOOKUP: HASH MOVETOFRONT");
- if ((d = dqp->HL_NEXT))
- d->HL_PREVP = dqp->HL_PREVP;
- *(dqp->HL_PREVP) = d;
- d = qh->qh_next;
- d->HL_PREVP = &dqp->HL_NEXT;
- dqp->HL_NEXT = d;
- dqp->HL_PREVP = &qh->qh_next;
- qh->qh_next = dqp;
- }
- xfs_dqtrace_entry(dqp, "LOOKUP END");
- *O_dqpp = dqp;
- ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
- return (0);
- }
- }
-
- *O_dqpp = NULL;
- ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
- return (1);
-}
-
-/*
- * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
- * a locked dquot, doing an allocation (if requested) as needed.
- * When both an inode and an id are given, the inode's id takes precedence.
- * That is, if the id changes while we don't hold the ilock inside this
- * function, the new dquot is returned, not necessarily the one requested
- * in the id argument.
- */
-int
-xfs_qm_dqget(
- xfs_mount_t *mp,
- xfs_inode_t *ip, /* locked inode (optional) */
- xfs_dqid_t id, /* uid/projid/gid depending on type */
- uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
- uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
- xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
-{
- xfs_dquot_t *dqp;
- xfs_dqhash_t *h;
- uint version;
- int error;
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
- if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
- (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
- (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
- return (ESRCH);
- }
- h = XFS_DQ_HASH(mp, id, type);
-
-#ifdef DEBUG
- if (xfs_do_dqerror) {
- if ((xfs_dqerror_target == mp->m_ddev_targp) &&
- (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
- cmn_err(CE_DEBUG, "Returning error in dqget");
- return (EIO);
- }
- }
-#endif
-
- again:
-
-#ifdef DEBUG
- ASSERT(type == XFS_DQ_USER ||
- type == XFS_DQ_PROJ ||
- type == XFS_DQ_GROUP);
- if (ip) {
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
- if (type == XFS_DQ_USER)
- ASSERT(ip->i_udquot == NULL);
- else
- ASSERT(ip->i_gdquot == NULL);
- }
-#endif
- XFS_DQ_HASH_LOCK(h);
-
- /*
- * Look in the cache (hashtable).
- * The chain is kept locked during lookup.
- */
- if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
- XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
- /*
- * The dquot was found, moved to the front of the chain,
- * taken off the freelist if it was on it, and locked
- * at this point. Just unlock the hashchain and return.
- */
- ASSERT(*O_dqpp);
- ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
- XFS_DQ_HASH_UNLOCK(h);
- xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)");
- return (0); /* success */
- }
- XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
-
- /*
- * Dquot cache miss. We don't want to keep the inode lock across
- * a (potential) disk read. Also we don't want to deal with the lock
- * ordering between quotainode and this inode. OTOH, dropping the inode
- * lock here means dealing with a chown that can happen before
- * we re-acquire the lock.
- */
- if (ip)
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- /*
- * Save the hashchain version stamp, and unlock the chain, so that
- * we don't keep the lock across a disk read
- */
- version = h->qh_version;
- XFS_DQ_HASH_UNLOCK(h);
-
- /*
- * Allocate the dquot on the kernel heap, and read the ondisk
- * portion off the disk. Also, do all the necessary initialization
- * This can return ENOENT if dquot didn't exist on disk and we didn't
- * ask it to allocate; ESRCH if quotas got turned off suddenly.
- */
- if ((error = xfs_qm_idtodq(mp, id, type,
- flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
- XFS_QMOPT_DOWARN),
- &dqp))) {
- if (ip)
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- return (error);
- }
-
- /*
- * See if this is mount code calling to look at the overall quota limits
- * which are stored in the id == 0 user or group's dquot.
- * Since we may not have done a quotacheck by this point, just return
- * the dquot without attaching it to any hashtables, lists, etc, or even
- * taking a reference.
- * The caller must dqdestroy this once done.
- */
- if (flags & XFS_QMOPT_DQSUSER) {
- ASSERT(id == 0);
- ASSERT(! ip);
- goto dqret;
- }
-
- /*
- * Dquot lock comes after hashlock in the lock ordering
- */
- if (ip) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (! XFS_IS_DQTYPE_ON(mp, type)) {
- /* inode stays locked on return */
- xfs_qm_dqdestroy(dqp);
- return XFS_ERROR(ESRCH);
- }
- /*
- * A dquot could be attached to this inode by now, since
- * we had dropped the ilock.
- */
- if (type == XFS_DQ_USER) {
- if (ip->i_udquot) {
- xfs_qm_dqdestroy(dqp);
- dqp = ip->i_udquot;
- xfs_dqlock(dqp);
- goto dqret;
- }
- } else {
- if (ip->i_gdquot) {
- xfs_qm_dqdestroy(dqp);
- dqp = ip->i_gdquot;
- xfs_dqlock(dqp);
- goto dqret;
- }
- }
- }
-
- /*
- * Hashlock comes after ilock in lock order
- */
- XFS_DQ_HASH_LOCK(h);
- if (version != h->qh_version) {
- xfs_dquot_t *tmpdqp;
- /*
- * Now, see if somebody else put the dquot in the
- * hashtable before us. This can happen because we didn't
- * keep the hashchain lock. We don't have to worry about
- * lock order between the two dquots here since dqp isn't
- * on any findable lists yet.
- */
- if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
- /*
- * Duplicate found. Just throw away the new dquot
- * and start over.
- */
- xfs_qm_dqput(tmpdqp);
- XFS_DQ_HASH_UNLOCK(h);
- xfs_qm_dqdestroy(dqp);
- XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
- goto again;
- }
- }
-
- /*
- * Put the dquot at the beginning of the hash-chain and mp's list
- * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
- */
- ASSERT(XFS_DQ_IS_HASH_LOCKED(h));
- dqp->q_hash = h;
- XQM_HASHLIST_INSERT(h, dqp);
-
- /*
- * Attach this dquot to this filesystem's list of all dquots,
- * kept inside the mount structure in m_quotainfo field
- */
- xfs_qm_mplist_lock(mp);
-
- /*
- * We return a locked dquot to the caller, with a reference taken
- */
- xfs_dqlock(dqp);
- dqp->q_nrefs = 1;
-
- XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp);
-
- xfs_qm_mplist_unlock(mp);
- XFS_DQ_HASH_UNLOCK(h);
- dqret:
- ASSERT((ip == NULL) || XFS_ISLOCKED_INODE_EXCL(ip));
- xfs_dqtrace_entry(dqp, "DQGET DONE");
- *O_dqpp = dqp;
- return (0);
-}
-
-
-/*
- * Release a reference to the dquot (decrement ref-count)
- * and unlock it. If there is a group quota attached to this
- * dquot, carefully release that too without tripping over
- * deadlocks'n'stuff.
- */
-void
-xfs_qm_dqput(
- xfs_dquot_t *dqp)
-{
- xfs_dquot_t *gdqp;
-
- ASSERT(dqp->q_nrefs > 0);
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- xfs_dqtrace_entry(dqp, "DQPUT");
-
- if (dqp->q_nrefs != 1) {
- dqp->q_nrefs--;
- xfs_dqunlock(dqp);
- return;
- }
-
- /*
- * drop the dqlock and acquire the freelist and dqlock
- * in the right order; but try to get it out-of-order first
- */
- if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
- xfs_dqtrace_entry(dqp, "DQPUT: FLLOCK-WAIT");
- xfs_dqunlock(dqp);
- xfs_qm_freelist_lock(xfs_Gqm);
- xfs_dqlock(dqp);
- }
-
- while (1) {
- gdqp = NULL;
-
- /* We can't depend on nrefs being == 1 here */
- if (--dqp->q_nrefs == 0) {
- xfs_dqtrace_entry(dqp, "DQPUT: ON FREELIST");
- /*
- * insert at end of the freelist.
- */
- XQM_FREELIST_INSERT(&(xfs_Gqm->qm_dqfreelist), dqp);
-
- /*
- * If we just added a udquot to the freelist, then
- * we want to release the gdquot reference that
- * it (probably) has. Otherwise it'll keep the
- * gdquot from getting reclaimed.
- */
- if ((gdqp = dqp->q_gdquot)) {
- /*
- * Avoid a recursive dqput call
- */
- xfs_dqlock(gdqp);
- dqp->q_gdquot = NULL;
- }
-
- /* xfs_qm_freelist_print(&(xfs_Gqm->qm_dqfreelist),
- "@@@@@++ Free list (after append) @@@@@+");
- */
- }
- xfs_dqunlock(dqp);
-
- /*
- * If we had a group quota inside the user quota as a hint,
- * release it now.
- */
- if (! gdqp)
- break;
- dqp = gdqp;
- }
- xfs_qm_freelist_unlock(xfs_Gqm);
-}
-
-/*
- * Release a dquot. Flush it if dirty, then dqput() it.
- * dquot must not be locked.
- */
-void
-xfs_qm_dqrele(
- xfs_dquot_t *dqp)
-{
- ASSERT(dqp);
- xfs_dqtrace_entry(dqp, "DQRELE");
-
- xfs_dqlock(dqp);
- /*
- * We don't care to flush it if the dquot is dirty here.
- * That will create stutters that we want to avoid.
- * Instead we do a delayed write when we try to reclaim
- * a dirty dquot. Also xfs_sync will take part of the burden...
- */
- xfs_qm_dqput(dqp);
-}
-
-
-/*
- * Write a modified dquot to disk.
- * The dquot must be locked and the flush lock too taken by caller.
- * The flush lock will not be unlocked until the dquot reaches the disk,
- * but the dquot is free to be unlocked and modified by the caller
- * in the interim. Dquot is still locked on return. This behavior is
- * identical to that of inodes.
- */
-int
-xfs_qm_dqflush(
- xfs_dquot_t *dqp,
- uint flags)
-{
- xfs_mount_t *mp;
- xfs_buf_t *bp;
- xfs_disk_dquot_t *ddqp;
- int error;
- SPLDECL(s);
-
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
- xfs_dqtrace_entry(dqp, "DQFLUSH");
-
- /*
- * If not dirty, nada.
- */
- if (!XFS_DQ_IS_DIRTY(dqp)) {
- xfs_dqfunlock(dqp);
- return (0);
- }
-
- /*
- * Cant flush a pinned dquot. Wait for it.
- */
- xfs_qm_dqunpin_wait(dqp);
-
- /*
- * This may have been unpinned because the filesystem is shutting
- * down forcibly. If that's the case we must not write this dquot
- * to disk, because the log record didn't make it to disk!
- */
- if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) {
- dqp->dq_flags &= ~(XFS_DQ_DIRTY);
- xfs_dqfunlock(dqp);
- return XFS_ERROR(EIO);
- }
-
- /*
- * Get the buffer containing the on-disk dquot
- * We don't need a transaction envelope because we know that the
- * the ondisk-dquot has already been allocated for.
- */
- if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) {
- xfs_dqtrace_entry(dqp, "DQTOBP FAIL");
- ASSERT(error != ENOENT);
- /*
- * Quotas could have gotten turned off (ESRCH)
- */
- xfs_dqfunlock(dqp);
- return (error);
- }
-
- if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id),
- 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
- xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE);
- return XFS_ERROR(EIO);
- }
-
- /* This is the only portion of data that needs to persist */
- memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t));
-
- /*
- * Clear the dirty field and remember the flush lsn for later use.
- */
- dqp->dq_flags &= ~(XFS_DQ_DIRTY);
- mp = dqp->q_mount;
-
- /* lsn is 64 bits */
- AIL_LOCK(mp, s);
- dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn;
- AIL_UNLOCK(mp, s);
-
- /*
- * Attach an iodone routine so that we can remove this dquot from the
- * AIL and release the flush lock once the dquot is synced to disk.
- */
- xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *))
- xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item));
- /*
- * If the buffer is pinned then push on the log so we won't
- * get stuck waiting in the write for too long.
- */
- if (XFS_BUF_ISPINNED(bp)) {
- xfs_dqtrace_entry(dqp, "DQFLUSH LOG FORCE");
- xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
- }
-
- if (flags & XFS_QMOPT_DELWRI) {
- xfs_bdwrite(mp, bp);
- } else if (flags & XFS_QMOPT_ASYNC) {
- xfs_bawrite(mp, bp);
- } else {
- error = xfs_bwrite(mp, bp);
- }
- xfs_dqtrace_entry(dqp, "DQFLUSH END");
- /*
- * dqp is still locked, but caller is free to unlock it now.
- */
- return (error);
-
-}
-
-/*
- * This is the dquot flushing I/O completion routine. It is called
- * from interrupt level when the buffer containing the dquot is
- * flushed to disk. It is responsible for removing the dquot logitem
- * from the AIL if it has not been re-logged, and unlocking the dquot's
- * flush lock. This behavior is very similar to that of inodes..
- */
-/*ARGSUSED*/
-STATIC void
-xfs_qm_dqflush_done(
- xfs_buf_t *bp,
- xfs_dq_logitem_t *qip)
-{
- xfs_dquot_t *dqp;
- SPLDECL(s);
-
- dqp = qip->qli_dquot;
-
- /*
- * We only want to pull the item from the AIL if its
- * location in the log has not changed since we started the flush.
- * Thus, we only bother if the dquot's lsn has
- * not changed. First we check the lsn outside the lock
- * since it's cheaper, and then we recheck while
- * holding the lock before removing the dquot from the AIL.
- */
- if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
- qip->qli_item.li_lsn == qip->qli_flush_lsn) {
-
- AIL_LOCK(dqp->q_mount, s);
- /*
- * xfs_trans_delete_ail() drops the AIL lock.
- */
- if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
- xfs_trans_delete_ail(dqp->q_mount,
- (xfs_log_item_t*)qip, s);
- else
- AIL_UNLOCK(dqp->q_mount, s);
- }
-
- /*
- * Release the dq's flush lock since we're done with it.
- */
- xfs_dqfunlock(dqp);
-}
-
-
-int
-xfs_qm_dqflock_nowait(
- xfs_dquot_t *dqp)
-{
- int locked;
-
- locked = cpsema(&((dqp)->q_flock));
-
- /* XXX ifdef these out */
- if (locked)
- (dqp)->dq_flags |= XFS_DQ_FLOCKED;
- return (locked);
-}
-
-
-int
-xfs_qm_dqlock_nowait(
- xfs_dquot_t *dqp)
-{
- return (mutex_trylock(&((dqp)->q_qlock)));
-}
-
-void
-xfs_dqlock(
- xfs_dquot_t *dqp)
-{
- mutex_lock(&(dqp->q_qlock));
-}
-
-void
-xfs_dqunlock(
- xfs_dquot_t *dqp)
-{
- mutex_unlock(&(dqp->q_qlock));
- if (dqp->q_logitem.qli_dquot == dqp) {
- /* Once was dqp->q_mount, but might just have been cleared */
- xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp,
- (xfs_log_item_t*)&(dqp->q_logitem));
- }
-}
-
-
-void
-xfs_dqunlock_nonotify(
- xfs_dquot_t *dqp)
-{
- mutex_unlock(&(dqp->q_qlock));
-}
-
-void
-xfs_dqlock2(
- xfs_dquot_t *d1,
- xfs_dquot_t *d2)
-{
- if (d1 && d2) {
- ASSERT(d1 != d2);
- if (be32_to_cpu(d1->q_core.d_id) >
- be32_to_cpu(d2->q_core.d_id)) {
- xfs_dqlock(d2);
- xfs_dqlock(d1);
- } else {
- xfs_dqlock(d1);
- xfs_dqlock(d2);
- }
- } else {
- if (d1) {
- xfs_dqlock(d1);
- } else if (d2) {
- xfs_dqlock(d2);
- }
- }
-}
-
-
-/*
- * Take a dquot out of the mount's dqlist as well as the hashlist.
- * This is called via unmount as well as quotaoff, and the purge
- * will always succeed unless there are soft (temp) references
- * outstanding.
- *
- * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
- * that we're returning! XXXsup - not cool.
- */
-/* ARGSUSED */
-int
-xfs_qm_dqpurge(
- xfs_dquot_t *dqp,
- uint flags)
-{
- xfs_dqhash_t *thishash;
- xfs_mount_t *mp;
-
- mp = dqp->q_mount;
-
- ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
- ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash));
-
- xfs_dqlock(dqp);
- /*
- * We really can't afford to purge a dquot that is
- * referenced, because these are hard refs.
- * It shouldn't happen in general because we went thru _all_ inodes in
- * dqrele_all_inodes before calling this and didn't let the mountlock go.
- * However it is possible that we have dquots with temporary
- * references that are not attached to an inode. e.g. see xfs_setattr().
- */
- if (dqp->q_nrefs != 0) {
- xfs_dqunlock(dqp);
- XFS_DQ_HASH_UNLOCK(dqp->q_hash);
- return (1);
- }
-
- ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
-
- /*
- * If we're turning off quotas, we have to make sure that, for
- * example, we don't delete quota disk blocks while dquots are
- * in the process of getting written to those disk blocks.
- * This dquot might well be on AIL, and we can't leave it there
- * if we're turning off quotas. Basically, we need this flush
- * lock, and are willing to block on it.
- */
- if (! xfs_qm_dqflock_nowait(dqp)) {
- /*
- * Block on the flush lock after nudging dquot buffer,
- * if it is incore.
- */
- xfs_qm_dqflock_pushbuf_wait(dqp);
- }
-
- /*
- * XXXIf we're turning this type of quotas off, we don't care
- * about the dirty metadata sitting in this dquot. OTOH, if
- * we're unmounting, we do care, so we flush it and wait.
- */
- if (XFS_DQ_IS_DIRTY(dqp)) {
- xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY");
- /* dqflush unlocks dqflock */
- /*
- * Given that dqpurge is a very rare occurrence, it is OK
- * that we're holding the hashlist and mplist locks
- * across the disk write. But, ... XXXsup
- *
- * We don't care about getting disk errors here. We need
- * to purge this dquot anyway, so we go ahead regardless.
- */
- (void) xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC);
- xfs_dqflock(dqp);
- }
- ASSERT(dqp->q_pincount == 0);
- ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
- !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
-
- thishash = dqp->q_hash;
- XQM_HASHLIST_REMOVE(thishash, dqp);
- XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(mp)), dqp);
- /*
- * XXX Move this to the front of the freelist, if we can get the
- * freelist lock.
- */
- ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
-
- dqp->q_mount = NULL;
- dqp->q_hash = NULL;
- dqp->dq_flags = XFS_DQ_INACTIVE;
- memset(&dqp->q_core, 0, sizeof(dqp->q_core));
- xfs_dqfunlock(dqp);
- xfs_dqunlock(dqp);
- XFS_DQ_HASH_UNLOCK(thishash);
- return (0);
-}
-
-
-#ifdef QUOTADEBUG
-void
-xfs_qm_dqprint(xfs_dquot_t *dqp)
-{
- cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------");
- cmn_err(CE_DEBUG, "---- dquotID = %d",
- (int)be32_to_cpu(dqp->q_core.d_id));
- cmn_err(CE_DEBUG, "---- type = %s", DQFLAGTO_TYPESTR(dqp));
- cmn_err(CE_DEBUG, "---- fs = 0x%p", dqp->q_mount);
- cmn_err(CE_DEBUG, "---- blkno = 0x%x", (int) dqp->q_blkno);
- cmn_err(CE_DEBUG, "---- boffset = 0x%x", (int) dqp->q_bufoffset);
- cmn_err(CE_DEBUG, "---- blkhlimit = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_blk_hardlimit),
- (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit));
- cmn_err(CE_DEBUG, "---- blkslimit = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_blk_softlimit),
- (int)be64_to_cpu(dqp->q_core.d_blk_softlimit));
- cmn_err(CE_DEBUG, "---- inohlimit = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_ino_hardlimit),
- (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit));
- cmn_err(CE_DEBUG, "---- inoslimit = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_ino_softlimit),
- (int)be64_to_cpu(dqp->q_core.d_ino_softlimit));
- cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_bcount),
- (int)be64_to_cpu(dqp->q_core.d_bcount));
- cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_icount),
- (int)be64_to_cpu(dqp->q_core.d_icount));
- cmn_err(CE_DEBUG, "---- btimer = %d",
- (int)be32_to_cpu(dqp->q_core.d_btimer));
- cmn_err(CE_DEBUG, "---- itimer = %d",
- (int)be32_to_cpu(dqp->q_core.d_itimer));
- cmn_err(CE_DEBUG, "---------------------------");
-}
-#endif
-
-/*
- * Give the buffer a little push if it is incore and
- * wait on the flush lock.
- */
-void
-xfs_qm_dqflock_pushbuf_wait(
- xfs_dquot_t *dqp)
-{
- xfs_buf_t *bp;
-
- /*
- * Check to see if the dquot has been flushed delayed
- * write. If so, grab its buffer and send it
- * out immediately. We'll be able to acquire
- * the flush lock when the I/O completes.
- */
- bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno,
- XFS_QI_DQCHUNKLEN(dqp->q_mount),
- XFS_INCORE_TRYLOCK);
- if (bp != NULL) {
- if (XFS_BUF_ISDELAYWRITE(bp)) {
- if (XFS_BUF_ISPINNED(bp)) {
- xfs_log_force(dqp->q_mount,
- (xfs_lsn_t)0,
- XFS_LOG_FORCE);
- }
- xfs_bawrite(dqp->q_mount, bp);
- } else {
- xfs_buf_relse(bp);
- }
- }
- xfs_dqflock(dqp);
-}
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
deleted file mode 100644
index c0c629663a5..00000000000
--- a/fs/xfs/quota/xfs_dquot.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DQUOT_H__
-#define __XFS_DQUOT_H__
-
-/*
- * Dquots are structures that hold quota information about a user or a group,
- * much like inodes are for files. In fact, dquots share many characteristics
- * with inodes. However, dquots can also be a centralized resource, relative
- * to a collection of inodes. In this respect, dquots share some characteristics
- * of the superblock.
- * XFS dquots exploit both those in its algorithms. They make every attempt
- * to not be a bottleneck when quotas are on and have minimal impact, if any,
- * when quotas are off.
- */
-
-/*
- * The hash chain headers (hash buckets)
- */
-typedef struct xfs_dqhash {
- struct xfs_dquot *qh_next;
- mutex_t qh_lock;
- uint qh_version; /* ever increasing version */
- uint qh_nelems; /* number of dquots on the list */
-} xfs_dqhash_t;
-
-typedef struct xfs_dqlink {
- struct xfs_dquot *ql_next; /* forward link */
- struct xfs_dquot **ql_prevp; /* pointer to prev ql_next */
-} xfs_dqlink_t;
-
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * This is the marker which is designed to occupy the first few
- * bytes of the xfs_dquot_t structure. Even inside this, the freelist pointers
- * must come first.
- * This serves as the marker ("sentinel") when we have to restart list
- * iterations because of locking considerations.
- */
-typedef struct xfs_dqmarker {
- struct xfs_dquot*dqm_flnext; /* link to freelist: must be first */
- struct xfs_dquot*dqm_flprev;
- xfs_dqlink_t dqm_mplist; /* link to mount's list of dquots */
- xfs_dqlink_t dqm_hashlist; /* link to the hash chain */
- uint dqm_flags; /* various flags (XFS_DQ_*) */
-} xfs_dqmarker_t;
-
-/*
- * The incore dquot structure
- */
-typedef struct xfs_dquot {
- xfs_dqmarker_t q_lists; /* list ptrs, q_flags (marker) */
- xfs_dqhash_t *q_hash; /* the hashchain header */
- struct xfs_mount*q_mount; /* filesystem this relates to */
- struct xfs_trans*q_transp; /* trans this belongs to currently */
- uint q_nrefs; /* # active refs from inodes */
- xfs_daddr_t q_blkno; /* blkno of dquot buffer */
- int q_bufoffset; /* off of dq in buffer (# dquots) */
- xfs_fileoff_t q_fileoffset; /* offset in quotas file */
-
- struct xfs_dquot*q_gdquot; /* group dquot, hint only */
- xfs_disk_dquot_t q_core; /* actual usage & quotas */
- xfs_dq_logitem_t q_logitem; /* dquot log item */
- xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */
- xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */
- xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
- mutex_t q_qlock; /* quota lock */
- sema_t q_flock; /* flush lock */
- uint q_pincount; /* pin count for this dquot */
- sv_t q_pinwait; /* sync var for pinning */
-#ifdef XFS_DQUOT_TRACE
- struct ktrace *q_trace; /* trace header structure */
-#endif
-} xfs_dquot_t;
-
-
-#define dq_flnext q_lists.dqm_flnext
-#define dq_flprev q_lists.dqm_flprev
-#define dq_mplist q_lists.dqm_mplist
-#define dq_hashlist q_lists.dqm_hashlist
-#define dq_flags q_lists.dqm_flags
-
-#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++)
-
-#ifdef DEBUG
-static inline int
-XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
-{
- if (mutex_trylock(&dqp->q_qlock)) {
- mutex_unlock(&dqp->q_qlock);
- return 0;
- }
- return 1;
-}
-#endif
-
-
-/*
- * The following three routines simply manage the q_flock
- * semaphore embedded in the dquot. This semaphore synchronizes
- * processes attempting to flush the in-core dquot back to disk.
- */
-#define xfs_dqflock(dqp) { psema(&((dqp)->q_flock), PINOD | PRECALC);\
- (dqp)->dq_flags |= XFS_DQ_FLOCKED; }
-#define xfs_dqfunlock(dqp) { ASSERT(valusema(&((dqp)->q_flock)) <= 0); \
- vsema(&((dqp)->q_flock)); \
- (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); }
-
-#define XFS_DQ_PINLOCK(dqp) mutex_spinlock( \
- &(XFS_DQ_TO_QINF(dqp)->qi_pinlock))
-#define XFS_DQ_PINUNLOCK(dqp, s) mutex_spinunlock( \
- &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s)
-
-#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (valusema(&((dqp)->q_flock)) <= 0)
-#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp))
-#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
-#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
-#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ)
-#define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP)
-#define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo)
-#define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \
- XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
- XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
-
-#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
- (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
- (XFS_IS_OQUOTA_ON((d)->q_mount))))
-
-#ifdef XFS_DQUOT_TRACE
-/*
- * Dquot Tracing stuff.
- */
-#define DQUOT_TRACE_SIZE 64
-#define DQUOT_KTRACE_ENTRY 1
-
-extern void __xfs_dqtrace_entry(xfs_dquot_t *dqp, char *func,
- void *, xfs_inode_t *);
-#define xfs_dqtrace_entry_ino(a,b,ip) \
- __xfs_dqtrace_entry((a), (b), (void*)__return_address, (ip))
-#define xfs_dqtrace_entry(a,b) \
- __xfs_dqtrace_entry((a), (b), (void*)__return_address, NULL)
-#else
-#define xfs_dqtrace_entry(a,b)
-#define xfs_dqtrace_entry_ino(a,b,ip)
-#endif
-
-#ifdef QUOTADEBUG
-extern void xfs_qm_dqprint(xfs_dquot_t *);
-#else
-#define xfs_qm_dqprint(a)
-#endif
-
-extern void xfs_qm_dqdestroy(xfs_dquot_t *);
-extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int xfs_qm_dqpurge(xfs_dquot_t *, uint);
-extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
-extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
-extern int xfs_qm_dqflock_nowait(xfs_dquot_t *);
-extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
-extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
- xfs_disk_dquot_t *);
-extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
- xfs_disk_dquot_t *);
-extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
- xfs_dqid_t, uint, uint, xfs_dquot_t **);
-extern void xfs_qm_dqput(xfs_dquot_t *);
-extern void xfs_qm_dqrele(xfs_dquot_t *);
-extern void xfs_dqlock(xfs_dquot_t *);
-extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
-extern void xfs_dqunlock(xfs_dquot_t *);
-extern void xfs_dqunlock_nonotify(xfs_dquot_t *);
-
-#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
deleted file mode 100644
index 2ec6b441849..00000000000
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ /dev/null
@@ -1,699 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-
-/*
- * returns the number of iovecs needed to log the given dquot item.
- */
-/* ARGSUSED */
-STATIC uint
-xfs_qm_dquot_logitem_size(
- xfs_dq_logitem_t *logitem)
-{
- /*
- * we need only two iovecs, one for the format, one for the real thing
- */
- return (2);
-}
-
-/*
- * fills in the vector of log iovecs for the given dquot log item.
- */
-STATIC void
-xfs_qm_dquot_logitem_format(
- xfs_dq_logitem_t *logitem,
- xfs_log_iovec_t *logvec)
-{
- ASSERT(logitem);
- ASSERT(logitem->qli_dquot);
-
- logvec->i_addr = (xfs_caddr_t)&logitem->qli_format;
- logvec->i_len = sizeof(xfs_dq_logformat_t);
- logvec++;
- logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core;
- logvec->i_len = sizeof(xfs_disk_dquot_t);
-
- ASSERT(2 == logitem->qli_item.li_desc->lid_size);
- logitem->qli_format.qlf_size = 2;
-
-}
-
-/*
- * Increment the pin count of the given dquot.
- * This value is protected by pinlock spinlock in the xQM structure.
- */
-STATIC void
-xfs_qm_dquot_logitem_pin(
- xfs_dq_logitem_t *logitem)
-{
- unsigned long s;
- xfs_dquot_t *dqp;
-
- dqp = logitem->qli_dquot;
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- s = XFS_DQ_PINLOCK(dqp);
- dqp->q_pincount++;
- XFS_DQ_PINUNLOCK(dqp, s);
-}
-
-/*
- * Decrement the pin count of the given dquot, and wake up
- * anyone in xfs_dqwait_unpin() if the count goes to 0. The
- * dquot must have been previously pinned with a call to xfs_dqpin().
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_dquot_logitem_unpin(
- xfs_dq_logitem_t *logitem,
- int stale)
-{
- unsigned long s;
- xfs_dquot_t *dqp;
-
- dqp = logitem->qli_dquot;
- ASSERT(dqp->q_pincount > 0);
- s = XFS_DQ_PINLOCK(dqp);
- dqp->q_pincount--;
- if (dqp->q_pincount == 0) {
- sv_broadcast(&dqp->q_pinwait);
- }
- XFS_DQ_PINUNLOCK(dqp, s);
-}
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_dquot_logitem_unpin_remove(
- xfs_dq_logitem_t *logitem,
- xfs_trans_t *tp)
-{
- xfs_qm_dquot_logitem_unpin(logitem, 0);
-}
-
-/*
- * Given the logitem, this writes the corresponding dquot entry to disk
- * asynchronously. This is called with the dquot entry securely locked;
- * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
- * at the end.
- */
-STATIC void
-xfs_qm_dquot_logitem_push(
- xfs_dq_logitem_t *logitem)
-{
- xfs_dquot_t *dqp;
-
- dqp = logitem->qli_dquot;
-
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
-
- /*
- * Since we were able to lock the dquot's flush lock and
- * we found it on the AIL, the dquot must be dirty. This
- * is because the dquot is removed from the AIL while still
- * holding the flush lock in xfs_dqflush_done(). Thus, if
- * we found it in the AIL and were able to obtain the flush
- * lock without sleeping, then there must not have been
- * anyone in the process of flushing the dquot.
- */
- xfs_qm_dqflush(dqp, XFS_B_DELWRI);
- xfs_dqunlock(dqp);
-}
-
-/*ARGSUSED*/
-STATIC xfs_lsn_t
-xfs_qm_dquot_logitem_committed(
- xfs_dq_logitem_t *l,
- xfs_lsn_t lsn)
-{
- /*
- * We always re-log the entire dquot when it becomes dirty,
- * so, the latest copy _is_ the only one that matters.
- */
- return (lsn);
-}
-
-
-/*
- * This is called to wait for the given dquot to be unpinned.
- * Most of these pin/unpin routines are plagiarized from inode code.
- */
-void
-xfs_qm_dqunpin_wait(
- xfs_dquot_t *dqp)
-{
- SPLDECL(s);
-
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- if (dqp->q_pincount == 0) {
- return;
- }
-
- /*
- * Give the log a push so we don't wait here too long.
- */
- xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE);
- s = XFS_DQ_PINLOCK(dqp);
- if (dqp->q_pincount == 0) {
- XFS_DQ_PINUNLOCK(dqp, s);
- return;
- }
- sv_wait(&(dqp->q_pinwait), PINOD,
- &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s);
-}
-
-/*
- * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
- * the dquot is locked by us, but the flush lock isn't. So, here we are
- * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
- * If so, we want to push it out to help us take this item off the AIL as soon
- * as possible.
- *
- * We must not be holding the AIL_LOCK at this point. Calling incore() to
- * search the buffercache can be a time consuming thing, and AIL_LOCK is a
- * spinlock.
- */
-STATIC void
-xfs_qm_dquot_logitem_pushbuf(
- xfs_dq_logitem_t *qip)
-{
- xfs_dquot_t *dqp;
- xfs_mount_t *mp;
- xfs_buf_t *bp;
- uint dopush;
-
- dqp = qip->qli_dquot;
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
- /*
- * The qli_pushbuf_flag keeps others from
- * trying to duplicate our effort.
- */
- ASSERT(qip->qli_pushbuf_flag != 0);
- ASSERT(qip->qli_push_owner == current_pid());
-
- /*
- * If flushlock isn't locked anymore, chances are that the
- * inode flush completed and the inode was taken off the AIL.
- * So, just get out.
- */
- if ((valusema(&(dqp->q_flock)) > 0) ||
- ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
- qip->qli_pushbuf_flag = 0;
- xfs_dqunlock(dqp);
- return;
- }
- mp = dqp->q_mount;
- bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
- XFS_QI_DQCHUNKLEN(mp),
- XFS_INCORE_TRYLOCK);
- if (bp != NULL) {
- if (XFS_BUF_ISDELAYWRITE(bp)) {
- dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
- (valusema(&(dqp->q_flock)) <= 0));
- qip->qli_pushbuf_flag = 0;
- xfs_dqunlock(dqp);
-
- if (XFS_BUF_ISPINNED(bp)) {
- xfs_log_force(mp, (xfs_lsn_t)0,
- XFS_LOG_FORCE);
- }
- if (dopush) {
-#ifdef XFSRACEDEBUG
- delay_for_intr();
- delay(300);
-#endif
- xfs_bawrite(mp, bp);
- } else {
- xfs_buf_relse(bp);
- }
- } else {
- qip->qli_pushbuf_flag = 0;
- xfs_dqunlock(dqp);
- xfs_buf_relse(bp);
- }
- return;
- }
-
- qip->qli_pushbuf_flag = 0;
- xfs_dqunlock(dqp);
-}
-
-/*
- * This is called to attempt to lock the dquot associated with this
- * dquot log item. Don't sleep on the dquot lock or the flush lock.
- * If the flush lock is already held, indicating that the dquot has
- * been or is in the process of being flushed, then see if we can
- * find the dquot's buffer in the buffer cache without sleeping. If
- * we can and it is marked delayed write, then we want to send it out.
- * We delay doing so until the push routine, though, to avoid sleeping
- * in any device strategy routines.
- */
-STATIC uint
-xfs_qm_dquot_logitem_trylock(
- xfs_dq_logitem_t *qip)
-{
- xfs_dquot_t *dqp;
- uint retval;
-
- dqp = qip->qli_dquot;
- if (dqp->q_pincount > 0)
- return (XFS_ITEM_PINNED);
-
- if (! xfs_qm_dqlock_nowait(dqp))
- return (XFS_ITEM_LOCKED);
-
- retval = XFS_ITEM_SUCCESS;
- if (! xfs_qm_dqflock_nowait(dqp)) {
- /*
- * The dquot is already being flushed. It may have been
- * flushed delayed write, however, and we don't want to
- * get stuck waiting for that to complete. So, we want to check
- * to see if we can lock the dquot's buffer without sleeping.
- * If we can and it is marked for delayed write, then we
- * hold it and send it out from the push routine. We don't
- * want to do that now since we might sleep in the device
- * strategy routine. We also don't want to grab the buffer lock
- * here because we'd like not to call into the buffer cache
- * while holding the AIL_LOCK.
- * Make sure to only return PUSHBUF if we set pushbuf_flag
- * ourselves. If someone else is doing it then we don't
- * want to go to the push routine and duplicate their efforts.
- */
- if (qip->qli_pushbuf_flag == 0) {
- qip->qli_pushbuf_flag = 1;
- ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
-#ifdef DEBUG
- qip->qli_push_owner = current_pid();
-#endif
- /*
- * The dquot is left locked.
- */
- retval = XFS_ITEM_PUSHBUF;
- } else {
- retval = XFS_ITEM_FLUSHING;
- xfs_dqunlock_nonotify(dqp);
- }
- }
-
- ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL);
- return (retval);
-}
-
-
-/*
- * Unlock the dquot associated with the log item.
- * Clear the fields of the dquot and dquot log item that
- * are specific to the current transaction. If the
- * hold flags is set, do not unlock the dquot.
- */
-STATIC void
-xfs_qm_dquot_logitem_unlock(
- xfs_dq_logitem_t *ql)
-{
- xfs_dquot_t *dqp;
-
- ASSERT(ql != NULL);
- dqp = ql->qli_dquot;
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
- /*
- * Clear the transaction pointer in the dquot
- */
- dqp->q_transp = NULL;
-
- /*
- * dquots are never 'held' from getting unlocked at the end of
- * a transaction. Their locking and unlocking is hidden inside the
- * transaction layer, within trans_commit. Hence, no LI_HOLD flag
- * for the logitem.
- */
- xfs_dqunlock(dqp);
-}
-
-
-/*
- * The transaction with the dquot locked has aborted. The dquot
- * must not be dirty within the transaction. We simply unlock just
- * as if the transaction had been cancelled.
- */
-STATIC void
-xfs_qm_dquot_logitem_abort(
- xfs_dq_logitem_t *ql)
-{
- xfs_qm_dquot_logitem_unlock(ql);
-}
-
-/*
- * this needs to stamp an lsn into the dquot, I think.
- * rpc's that look at user dquot's would then have to
- * push on the dependency recorded in the dquot
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_dquot_logitem_committing(
- xfs_dq_logitem_t *l,
- xfs_lsn_t lsn)
-{
- return;
-}
-
-
-/*
- * This is the ops vector for dquots
- */
-STATIC struct xfs_item_ops xfs_dquot_item_ops = {
- .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size,
- .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
- xfs_qm_dquot_logitem_format,
- .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin,
- .iop_unpin = (void(*)(xfs_log_item_t*, int))
- xfs_qm_dquot_logitem_unpin,
- .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
- xfs_qm_dquot_logitem_unpin_remove,
- .iop_trylock = (uint(*)(xfs_log_item_t*))
- xfs_qm_dquot_logitem_trylock,
- .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unlock,
- .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_qm_dquot_logitem_committed,
- .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push,
- .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_abort,
- .iop_pushbuf = (void(*)(xfs_log_item_t*))
- xfs_qm_dquot_logitem_pushbuf,
- .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_qm_dquot_logitem_committing
-};
-
-/*
- * Initialize the dquot log item for a newly allocated dquot.
- * The dquot isn't locked at this point, but it isn't on any of the lists
- * either, so we don't care.
- */
-void
-xfs_qm_dquot_logitem_init(
- struct xfs_dquot *dqp)
-{
- xfs_dq_logitem_t *lp;
- lp = &dqp->q_logitem;
-
- lp->qli_item.li_type = XFS_LI_DQUOT;
- lp->qli_item.li_ops = &xfs_dquot_item_ops;
- lp->qli_item.li_mountp = dqp->q_mount;
- lp->qli_dquot = dqp;
- lp->qli_format.qlf_type = XFS_LI_DQUOT;
- lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
- lp->qli_format.qlf_blkno = dqp->q_blkno;
- lp->qli_format.qlf_len = 1;
- /*
- * This is just the offset of this dquot within its buffer
- * (which is currently 1 FSB and probably won't change).
- * Hence 32 bits for this offset should be just fine.
- * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
- * here, and recompute it at recovery time.
- */
- lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
-}
-
-/*------------------ QUOTAOFF LOG ITEMS -------------------*/
-
-/*
- * This returns the number of iovecs needed to log the given quotaoff item.
- * We only need 1 iovec for an quotaoff item. It just logs the
- * quotaoff_log_format structure.
- */
-/*ARGSUSED*/
-STATIC uint
-xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf)
-{
- return (1);
-}
-
-/*
- * This is called to fill in the vector of log iovecs for the
- * given quotaoff log item. We use only 1 iovec, and we point that
- * at the quotaoff_log_format structure embedded in the quotaoff item.
- * It is at this point that we assert that all of the extent
- * slots in the quotaoff item have been filled.
- */
-STATIC void
-xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf,
- xfs_log_iovec_t *log_vector)
-{
- ASSERT(qf->qql_format.qf_type == XFS_LI_QUOTAOFF);
-
- log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
- log_vector->i_len = sizeof(xfs_qoff_logitem_t);
- XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF);
- qf->qql_format.qf_size = 1;
-}
-
-
-/*
- * Pinning has no meaning for an quotaoff item, so just return.
- */
-/*ARGSUSED*/
-STATIC void
-xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf)
-{
- return;
-}
-
-
-/*
- * Since pinning has no meaning for an quotaoff item, unpinning does
- * not either.
- */
-/*ARGSUSED*/
-STATIC void
-xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf, int stale)
-{
- return;
-}
-
-/*ARGSUSED*/
-STATIC void
-xfs_qm_qoff_logitem_unpin_remove(xfs_qoff_logitem_t *qf, xfs_trans_t *tp)
-{
- return;
-}
-
-/*
- * Quotaoff items have no locking, so just return success.
- */
-/*ARGSUSED*/
-STATIC uint
-xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf)
-{
- return XFS_ITEM_LOCKED;
-}
-
-/*
- * Quotaoff items have no locking or pushing, so return failure
- * so that the caller doesn't bother with us.
- */
-/*ARGSUSED*/
-STATIC void
-xfs_qm_qoff_logitem_unlock(xfs_qoff_logitem_t *qf)
-{
- return;
-}
-
-/*
- * The quotaoff-start-item is logged only once and cannot be moved in the log,
- * so simply return the lsn at which it's been logged.
- */
-/*ARGSUSED*/
-STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn)
-{
- return (lsn);
-}
-
-/*
- * The transaction of which this QUOTAOFF is a part has been aborted.
- * Just clean up after ourselves.
- * Shouldn't this never happen in the case of qoffend logitems? XXX
- */
-STATIC void
-xfs_qm_qoff_logitem_abort(xfs_qoff_logitem_t *qf)
-{
- kmem_free(qf, sizeof(xfs_qoff_logitem_t));
-}
-
-/*
- * There isn't much you can do to push on an quotaoff item. It is simply
- * stuck waiting for the log to be flushed to disk.
- */
-/*ARGSUSED*/
-STATIC void
-xfs_qm_qoff_logitem_push(xfs_qoff_logitem_t *qf)
-{
- return;
-}
-
-
-/*ARGSUSED*/
-STATIC xfs_lsn_t
-xfs_qm_qoffend_logitem_committed(
- xfs_qoff_logitem_t *qfe,
- xfs_lsn_t lsn)
-{
- xfs_qoff_logitem_t *qfs;
- SPLDECL(s);
-
- qfs = qfe->qql_start_lip;
- AIL_LOCK(qfs->qql_item.li_mountp,s);
- /*
- * Delete the qoff-start logitem from the AIL.
- * xfs_trans_delete_ail() drops the AIL lock.
- */
- xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs, s);
- kmem_free(qfs, sizeof(xfs_qoff_logitem_t));
- kmem_free(qfe, sizeof(xfs_qoff_logitem_t));
- return (xfs_lsn_t)-1;
-}
-
-/*
- * XXX rcc - don't know quite what to do with this. I think we can
- * just ignore it. The only time that isn't the case is if we allow
- * the client to somehow see that quotas have been turned off in which
- * we can't allow that to get back until the quotaoff hits the disk.
- * So how would that happen? Also, do we need different routines for
- * quotaoff start and quotaoff end? I suspect the answer is yes but
- * to be sure, I need to look at the recovery code and see how quota off
- * recovery is handled (do we roll forward or back or do something else).
- * If we roll forwards or backwards, then we need two separate routines,
- * one that does nothing and one that stamps in the lsn that matters
- * (truly makes the quotaoff irrevocable). If we do something else,
- * then maybe we don't need two.
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_qoff_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
-{
- return;
-}
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
-{
- return;
-}
-
-STATIC struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
- .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
- .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
- xfs_qm_qoff_logitem_format,
- .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
- .iop_unpin = (void(*)(xfs_log_item_t* ,int))
- xfs_qm_qoff_logitem_unpin,
- .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
- xfs_qm_qoff_logitem_unpin_remove,
- .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
- .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock,
- .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_qm_qoffend_logitem_committed,
- .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
- .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort,
- .iop_pushbuf = NULL,
- .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_qm_qoffend_logitem_committing
-};
-
-/*
- * This is the ops vector shared by all quotaoff-start log items.
- */
-STATIC struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
- .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
- .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
- xfs_qm_qoff_logitem_format,
- .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
- .iop_unpin = (void(*)(xfs_log_item_t*, int))
- xfs_qm_qoff_logitem_unpin,
- .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
- xfs_qm_qoff_logitem_unpin_remove,
- .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
- .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock,
- .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_qm_qoff_logitem_committed,
- .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
- .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort,
- .iop_pushbuf = NULL,
- .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_qm_qoff_logitem_committing
-};
-
-/*
- * Allocate and initialize an quotaoff item of the correct quota type(s).
- */
-xfs_qoff_logitem_t *
-xfs_qm_qoff_logitem_init(
- struct xfs_mount *mp,
- xfs_qoff_logitem_t *start,
- uint flags)
-{
- xfs_qoff_logitem_t *qf;
-
- qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP);
-
- qf->qql_item.li_type = XFS_LI_QUOTAOFF;
- if (start)
- qf->qql_item.li_ops = &xfs_qm_qoffend_logitem_ops;
- else
- qf->qql_item.li_ops = &xfs_qm_qoff_logitem_ops;
- qf->qql_item.li_mountp = mp;
- qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
- qf->qql_format.qf_flags = flags;
- qf->qql_start_lip = start;
- return (qf);
-}
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
deleted file mode 100644
index 53a00fb217f..00000000000
--- a/fs/xfs/quota/xfs_qm.c
+++ /dev/null
@@ -1,2859 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_clnt.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_bmap.h"
-#include "xfs_rw.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_utils.h"
-#include "xfs_qm.h"
-
-/*
- * The global quota manager. There is only one of these for the entire
- * system, _not_ one per file system. XQM keeps track of the overall
- * quota functionality, including maintaining the freelist and hash
- * tables of dquots.
- */
-mutex_t xfs_Gqm_lock;
-struct xfs_qm *xfs_Gqm;
-uint ndquot;
-
-kmem_zone_t *qm_dqzone;
-kmem_zone_t *qm_dqtrxzone;
-STATIC kmem_shaker_t xfs_qm_shaker;
-
-STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
-STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
-
-STATIC void xfs_qm_freelist_init(xfs_frlist_t *);
-STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *);
-STATIC int xfs_qm_mplist_nowait(xfs_mount_t *);
-STATIC int xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
-
-STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
-STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int xfs_qm_shake(int, gfp_t);
-
-#ifdef DEBUG
-extern mutex_t qcheck_lock;
-#endif
-
-#ifdef QUOTADEBUG
-#define XQM_LIST_PRINT(l, NXT, title) \
-{ \
- xfs_dquot_t *dqp; int i = 0; \
- cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
- for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
- cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " \
- "bcnt = %d, icnt = %d, refs = %d", \
- ++i, (int) be32_to_cpu(dqp->q_core.d_id), \
- DQFLAGTO_TYPESTR(dqp), \
- (int) be64_to_cpu(dqp->q_core.d_bcount), \
- (int) be64_to_cpu(dqp->q_core.d_icount), \
- (int) dqp->q_nrefs); } \
-}
-#else
-#define XQM_LIST_PRINT(l, NXT, title) do { } while (0)
-#endif
-
-/*
- * Initialize the XQM structure.
- * Note that there is not one quota manager per file system.
- */
-STATIC struct xfs_qm *
-xfs_Gqm_init(void)
-{
- xfs_dqhash_t *udqhash, *gdqhash;
- xfs_qm_t *xqm;
- uint i, hsize, flags = KM_SLEEP | KM_MAYFAIL;
-
- /*
- * Initialize the dquot hash tables.
- */
- hsize = XFS_QM_HASHSIZE_HIGH;
- while (!(udqhash = kmem_zalloc(hsize * sizeof(xfs_dqhash_t), flags))) {
- if ((hsize >>= 1) <= XFS_QM_HASHSIZE_LOW)
- flags = KM_SLEEP;
- }
- gdqhash = kmem_zalloc(hsize * sizeof(xfs_dqhash_t), KM_SLEEP);
- ndquot = hsize << 8;
-
- xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
- xqm->qm_dqhashmask = hsize - 1;
- xqm->qm_usr_dqhtable = udqhash;
- xqm->qm_grp_dqhtable = gdqhash;
- ASSERT(xqm->qm_usr_dqhtable != NULL);
- ASSERT(xqm->qm_grp_dqhtable != NULL);
-
- for (i = 0; i < hsize; i++) {
- xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
- xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
- }
-
- /*
- * Freelist of all dquots of all file systems
- */
- xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
-
- /*
- * dquot zone. we register our own low-memory callback.
- */
- if (!qm_dqzone) {
- xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
- "xfs_dquots");
- qm_dqzone = xqm->qm_dqzone;
- } else
- xqm->qm_dqzone = qm_dqzone;
-
- xfs_qm_shaker = kmem_shake_register(xfs_qm_shake);
-
- /*
- * The t_dqinfo portion of transactions.
- */
- if (!qm_dqtrxzone) {
- xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
- "xfs_dqtrx");
- qm_dqtrxzone = xqm->qm_dqtrxzone;
- } else
- xqm->qm_dqtrxzone = qm_dqtrxzone;
-
- atomic_set(&xqm->qm_totaldquots, 0);
- xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
- xqm->qm_nrefs = 0;
-#ifdef DEBUG
- mutex_init(&qcheck_lock);
-#endif
- return xqm;
-}
-
-/*
- * Destroy the global quota manager when its reference count goes to zero.
- */
-STATIC void
-xfs_qm_destroy(
- struct xfs_qm *xqm)
-{
- int hsize, i;
-
- ASSERT(xqm != NULL);
- ASSERT(xqm->qm_nrefs == 0);
- kmem_shake_deregister(xfs_qm_shaker);
- hsize = xqm->qm_dqhashmask + 1;
- for (i = 0; i < hsize; i++) {
- xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
- xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
- }
- kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t));
- kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t));
- xqm->qm_usr_dqhtable = NULL;
- xqm->qm_grp_dqhtable = NULL;
- xqm->qm_dqhashmask = 0;
- xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
-#ifdef DEBUG
- mutex_destroy(&qcheck_lock);
-#endif
- kmem_free(xqm, sizeof(xfs_qm_t));
-}
-
-/*
- * Called at mount time to let XQM know that another file system is
- * starting quotas. This isn't crucial information as the individual mount
- * structures are pretty independent, but it helps the XQM keep a
- * global view of what's going on.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_hold_quotafs_ref(
- struct xfs_mount *mp)
-{
- /*
- * Need to lock the xfs_Gqm structure for things like this. For example,
- * the structure could disappear between the entry to this routine and
- * a HOLD operation if not locked.
- */
- XFS_QM_LOCK(xfs_Gqm);
-
- if (xfs_Gqm == NULL)
- xfs_Gqm = xfs_Gqm_init();
- /*
- * We can keep a list of all filesystems with quotas mounted for
- * debugging and statistical purposes, but ...
- * Just take a reference and get out.
- */
- XFS_QM_HOLD(xfs_Gqm);
- XFS_QM_UNLOCK(xfs_Gqm);
-
- return 0;
-}
-
-
-/*
- * Release the reference that a filesystem took at mount time,
- * so that we know when we need to destroy the entire quota manager.
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_rele_quotafs_ref(
- struct xfs_mount *mp)
-{
- xfs_dquot_t *dqp, *nextdqp;
-
- ASSERT(xfs_Gqm);
- ASSERT(xfs_Gqm->qm_nrefs > 0);
-
- /*
- * Go thru the freelist and destroy all inactive dquots.
- */
- xfs_qm_freelist_lock(xfs_Gqm);
-
- for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
- dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
- xfs_dqlock(dqp);
- nextdqp = dqp->dq_flnext;
- if (dqp->dq_flags & XFS_DQ_INACTIVE) {
- ASSERT(dqp->q_mount == NULL);
- ASSERT(! XFS_DQ_IS_DIRTY(dqp));
- ASSERT(dqp->HL_PREVP == NULL);
- ASSERT(dqp->MPL_PREVP == NULL);
- XQM_FREELIST_REMOVE(dqp);
- xfs_dqunlock(dqp);
- xfs_qm_dqdestroy(dqp);
- } else {
- xfs_dqunlock(dqp);
- }
- dqp = nextdqp;
- }
- xfs_qm_freelist_unlock(xfs_Gqm);
-
- /*
- * Destroy the entire XQM. If somebody mounts with quotaon, this'll
- * be restarted.
- */
- XFS_QM_LOCK(xfs_Gqm);
- XFS_QM_RELE(xfs_Gqm);
- if (xfs_Gqm->qm_nrefs == 0) {
- xfs_qm_destroy(xfs_Gqm);
- xfs_Gqm = NULL;
- }
- XFS_QM_UNLOCK(xfs_Gqm);
-}
-
-/*
- * This is called at mount time from xfs_mountfs to initialize the quotainfo
- * structure and start the global quotamanager (xfs_Gqm) if it hasn't done
- * so already. Note that the superblock has not been read in yet.
- */
-void
-xfs_qm_mount_quotainit(
- xfs_mount_t *mp,
- uint flags)
-{
- /*
- * User, projects or group quotas has to be on.
- */
- ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA));
-
- /*
- * Initialize the flags in the mount structure. From this point
- * onwards we look at m_qflags to figure out if quotas's ON/OFF, etc.
- * Note that we enforce nothing if accounting is off.
- * ie. XFSMNT_*QUOTA must be ON for XFSMNT_*QUOTAENF.
- * It isn't necessary to take the quotaoff lock to do this; this is
- * called from mount.
- */
- if (flags & XFSMNT_UQUOTA) {
- mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
- if (flags & XFSMNT_UQUOTAENF)
- mp->m_qflags |= XFS_UQUOTA_ENFD;
- }
- if (flags & XFSMNT_GQUOTA) {
- mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
- if (flags & XFSMNT_GQUOTAENF)
- mp->m_qflags |= XFS_OQUOTA_ENFD;
- } else if (flags & XFSMNT_PQUOTA) {
- mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
- if (flags & XFSMNT_PQUOTAENF)
- mp->m_qflags |= XFS_OQUOTA_ENFD;
- }
-}
-
-/*
- * Just destroy the quotainfo structure.
- */
-void
-xfs_qm_unmount_quotadestroy(
- xfs_mount_t *mp)
-{
- if (mp->m_quotainfo)
- xfs_qm_destroy_quotainfo(mp);
-}
-
-
-/*
- * This is called from xfs_mountfs to start quotas and initialize all
- * necessary data structures like quotainfo. This is also responsible for
- * running a quotacheck as necessary. We are guaranteed that the superblock
- * is consistently read in at this point.
- */
-int
-xfs_qm_mount_quotas(
- xfs_mount_t *mp,
- int mfsi_flags)
-{
- unsigned long s;
- int error = 0;
- uint sbf;
-
-
- /*
- * If quotas on realtime volumes is not supported, we disable
- * quotas immediately.
- */
- if (mp->m_sb.sb_rextents) {
- cmn_err(CE_NOTE,
- "Cannot turn on quotas for realtime filesystem %s",
- mp->m_fsname);
- mp->m_qflags = 0;
- goto write_changes;
- }
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- /*
- * Allocate the quotainfo structure inside the mount struct, and
- * create quotainode(s), and change/rev superblock if necessary.
- */
- if ((error = xfs_qm_init_quotainfo(mp))) {
- /*
- * We must turn off quotas.
- */
- ASSERT(mp->m_quotainfo == NULL);
- mp->m_qflags = 0;
- goto write_changes;
- }
- /*
- * If any of the quotas are not consistent, do a quotacheck.
- */
- if (XFS_QM_NEED_QUOTACHECK(mp) &&
- !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
- if ((error = xfs_qm_quotacheck(mp))) {
- /* Quotacheck has failed and quotas have
- * been disabled.
- */
- return XFS_ERROR(error);
- }
- }
-
- write_changes:
- /*
- * We actually don't have to acquire the SB_LOCK at all.
- * This can only be called from mount, and that's single threaded. XXX
- */
- s = XFS_SB_LOCK(mp);
- sbf = mp->m_sb.sb_qflags;
- mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
- XFS_SB_UNLOCK(mp, s);
-
- if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
- if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
- /*
- * We could only have been turning quotas off.
- * We aren't in very good shape actually because
- * the incore structures are convinced that quotas are
- * off, but the on disk superblock doesn't know that !
- */
- ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
- xfs_fs_cmn_err(CE_ALERT, mp,
- "XFS mount_quotas: Superblock update failed!");
- }
- }
-
- if (error) {
- xfs_fs_cmn_err(CE_WARN, mp,
- "Failed to initialize disk quotas.");
- }
- return XFS_ERROR(error);
-}
-
-/*
- * Called from the vfsops layer.
- */
-int
-xfs_qm_unmount_quotas(
- xfs_mount_t *mp)
-{
- xfs_inode_t *uqp, *gqp;
- int error = 0;
-
- /*
- * Release the dquots that root inode, et al might be holding,
- * before we flush quotas and blow away the quotainfo structure.
- */
- ASSERT(mp->m_rootip);
- xfs_qm_dqdetach(mp->m_rootip);
- if (mp->m_rbmip)
- xfs_qm_dqdetach(mp->m_rbmip);
- if (mp->m_rsumip)
- xfs_qm_dqdetach(mp->m_rsumip);
-
- /*
- * Flush out the quota inodes.
- */
- uqp = gqp = NULL;
- if (mp->m_quotainfo) {
- if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) {
- xfs_ilock(uqp, XFS_ILOCK_EXCL);
- xfs_iflock(uqp);
- error = xfs_iflush(uqp, XFS_IFLUSH_SYNC);
- xfs_iunlock(uqp, XFS_ILOCK_EXCL);
- if (unlikely(error == EFSCORRUPTED)) {
- XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)",
- XFS_ERRLEVEL_LOW, mp);
- goto out;
- }
- }
- if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) {
- xfs_ilock(gqp, XFS_ILOCK_EXCL);
- xfs_iflock(gqp);
- error = xfs_iflush(gqp, XFS_IFLUSH_SYNC);
- xfs_iunlock(gqp, XFS_ILOCK_EXCL);
- if (unlikely(error == EFSCORRUPTED)) {
- XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)",
- XFS_ERRLEVEL_LOW, mp);
- goto out;
- }
- }
- }
- if (uqp) {
- XFS_PURGE_INODE(uqp);
- mp->m_quotainfo->qi_uquotaip = NULL;
- }
- if (gqp) {
- XFS_PURGE_INODE(gqp);
- mp->m_quotainfo->qi_gquotaip = NULL;
- }
-out:
- return XFS_ERROR(error);
-}
-
-/*
- * Flush all dquots of the given file system to disk. The dquots are
- * _not_ purged from memory here, just their data written to disk.
- */
-STATIC int
-xfs_qm_dqflush_all(
- xfs_mount_t *mp,
- int flags)
-{
- int recl;
- xfs_dquot_t *dqp;
- int niters;
- int error;
-
- if (mp->m_quotainfo == NULL)
- return 0;
- niters = 0;
-again:
- xfs_qm_mplist_lock(mp);
- FOREACH_DQUOT_IN_MP(dqp, mp) {
- xfs_dqlock(dqp);
- if (! XFS_DQ_IS_DIRTY(dqp)) {
- xfs_dqunlock(dqp);
- continue;
- }
- xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
- /* XXX a sentinel would be better */
- recl = XFS_QI_MPLRECLAIMS(mp);
- if (! xfs_qm_dqflock_nowait(dqp)) {
- /*
- * If we can't grab the flush lock then check
- * to see if the dquot has been flushed delayed
- * write. If so, grab its buffer and send it
- * out immediately. We'll be able to acquire
- * the flush lock when the I/O completes.
- */
- xfs_qm_dqflock_pushbuf_wait(dqp);
- }
- /*
- * Let go of the mplist lock. We don't want to hold it
- * across a disk write.
- */
- xfs_qm_mplist_unlock(mp);
- error = xfs_qm_dqflush(dqp, flags);
- xfs_dqunlock(dqp);
- if (error)
- return error;
-
- xfs_qm_mplist_lock(mp);
- if (recl != XFS_QI_MPLRECLAIMS(mp)) {
- xfs_qm_mplist_unlock(mp);
- /* XXX restart limit */
- goto again;
- }
- }
-
- xfs_qm_mplist_unlock(mp);
- /* return ! busy */
- return 0;
-}
-/*
- * Release the group dquot pointers the user dquots may be
- * carrying around as a hint. mplist is locked on entry and exit.
- */
-STATIC void
-xfs_qm_detach_gdquots(
- xfs_mount_t *mp)
-{
- xfs_dquot_t *dqp, *gdqp;
- int nrecl;
-
- again:
- ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
- dqp = XFS_QI_MPLNEXT(mp);
- while (dqp) {
- xfs_dqlock(dqp);
- if ((gdqp = dqp->q_gdquot)) {
- xfs_dqlock(gdqp);
- dqp->q_gdquot = NULL;
- }
- xfs_dqunlock(dqp);
-
- if (gdqp) {
- /*
- * Can't hold the mplist lock across a dqput.
- * XXXmust convert to marker based iterations here.
- */
- nrecl = XFS_QI_MPLRECLAIMS(mp);
- xfs_qm_mplist_unlock(mp);
- xfs_qm_dqput(gdqp);
-
- xfs_qm_mplist_lock(mp);
- if (nrecl != XFS_QI_MPLRECLAIMS(mp))
- goto again;
- }
- dqp = dqp->MPL_NEXT;
- }
-}
-
-/*
- * Go through all the incore dquots of this file system and take them
- * off the mplist and hashlist, if the dquot type matches the dqtype
- * parameter. This is used when turning off quota accounting for
- * users and/or groups, as well as when the filesystem is unmounting.
- */
-STATIC int
-xfs_qm_dqpurge_int(
- xfs_mount_t *mp,
- uint flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */
-{
- xfs_dquot_t *dqp;
- uint dqtype;
- int nrecl;
- xfs_dquot_t *nextdqp;
- int nmisses;
-
- if (mp->m_quotainfo == NULL)
- return 0;
-
- dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
- dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
- dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
-
- xfs_qm_mplist_lock(mp);
-
- /*
- * In the first pass through all incore dquots of this filesystem,
- * we release the group dquot pointers the user dquots may be
- * carrying around as a hint. We need to do this irrespective of
- * what's being turned off.
- */
- xfs_qm_detach_gdquots(mp);
-
- again:
- nmisses = 0;
- ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
- /*
- * Try to get rid of all of the unwanted dquots. The idea is to
- * get them off mplist and hashlist, but leave them on freelist.
- */
- dqp = XFS_QI_MPLNEXT(mp);
- while (dqp) {
- /*
- * It's OK to look at the type without taking dqlock here.
- * We're holding the mplist lock here, and that's needed for
- * a dqreclaim.
- */
- if ((dqp->dq_flags & dqtype) == 0) {
- dqp = dqp->MPL_NEXT;
- continue;
- }
-
- if (! xfs_qm_dqhashlock_nowait(dqp)) {
- nrecl = XFS_QI_MPLRECLAIMS(mp);
- xfs_qm_mplist_unlock(mp);
- XFS_DQ_HASH_LOCK(dqp->q_hash);
- xfs_qm_mplist_lock(mp);
-
- /*
- * XXXTheoretically, we can get into a very long
- * ping pong game here.
- * No one can be adding dquots to the mplist at
- * this point, but somebody might be taking things off.
- */
- if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
- XFS_DQ_HASH_UNLOCK(dqp->q_hash);
- goto again;
- }
- }
-
- /*
- * Take the dquot off the mplist and hashlist. It may remain on
- * freelist in INACTIVE state.
- */
- nextdqp = dqp->MPL_NEXT;
- nmisses += xfs_qm_dqpurge(dqp, flags);
- dqp = nextdqp;
- }
- xfs_qm_mplist_unlock(mp);
- return nmisses;
-}
-
-int
-xfs_qm_dqpurge_all(
- xfs_mount_t *mp,
- uint flags)
-{
- int ndquots;
-
- /*
- * Purge the dquot cache.
- * None of the dquots should really be busy at this point.
- */
- if (mp->m_quotainfo) {
- while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
- delay(ndquots * 10);
- }
- }
- return 0;
-}
-
-STATIC int
-xfs_qm_dqattach_one(
- xfs_inode_t *ip,
- xfs_dqid_t id,
- uint type,
- uint doalloc,
- uint dolock,
- xfs_dquot_t *udqhint, /* hint */
- xfs_dquot_t **IO_idqpp)
-{
- xfs_dquot_t *dqp;
- int error;
-
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
- error = 0;
- /*
- * See if we already have it in the inode itself. IO_idqpp is
- * &i_udquot or &i_gdquot. This made the code look weird, but
- * made the logic a lot simpler.
- */
- if ((dqp = *IO_idqpp)) {
- if (dolock)
- xfs_dqlock(dqp);
- xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
- goto done;
- }
-
- /*
- * udqhint is the i_udquot field in inode, and is non-NULL only
- * when the type arg is group/project. Its purpose is to save a
- * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
- * the user dquot.
- */
- ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
- if (udqhint && !dolock)
- xfs_dqlock(udqhint);
-
- /*
- * No need to take dqlock to look at the id.
- * The ID can't change until it gets reclaimed, and it won't
- * be reclaimed as long as we have a ref from inode and we hold
- * the ilock.
- */
- if (udqhint &&
- (dqp = udqhint->q_gdquot) &&
- (be32_to_cpu(dqp->q_core.d_id) == id)) {
- ASSERT(XFS_DQ_IS_LOCKED(udqhint));
- xfs_dqlock(dqp);
- XFS_DQHOLD(dqp);
- ASSERT(*IO_idqpp == NULL);
- *IO_idqpp = dqp;
- if (!dolock) {
- xfs_dqunlock(dqp);
- xfs_dqunlock(udqhint);
- }
- goto done;
- }
- /*
- * We can't hold a dquot lock when we call the dqget code.
- * We'll deadlock in no time, because of (not conforming to)
- * lock ordering - the inodelock comes before any dquot lock,
- * and we may drop and reacquire the ilock in xfs_qm_dqget().
- */
- if (udqhint)
- xfs_dqunlock(udqhint);
- /*
- * Find the dquot from somewhere. This bumps the
- * reference count of dquot and returns it locked.
- * This can return ENOENT if dquot didn't exist on
- * disk and we didn't ask it to allocate;
- * ESRCH if quotas got turned off suddenly.
- */
- if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type,
- doalloc|XFS_QMOPT_DOWARN, &dqp))) {
- if (udqhint && dolock)
- xfs_dqlock(udqhint);
- goto done;
- }
-
- xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
- /*
- * dqget may have dropped and re-acquired the ilock, but it guarantees
- * that the dquot returned is the one that should go in the inode.
- */
- *IO_idqpp = dqp;
- ASSERT(dqp);
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- if (! dolock) {
- xfs_dqunlock(dqp);
- goto done;
- }
- if (! udqhint)
- goto done;
-
- ASSERT(udqhint);
- ASSERT(dolock);
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- if (! xfs_qm_dqlock_nowait(udqhint)) {
- xfs_dqunlock(dqp);
- xfs_dqlock(udqhint);
- xfs_dqlock(dqp);
- }
- done:
-#ifdef QUOTADEBUG
- if (udqhint) {
- if (dolock)
- ASSERT(XFS_DQ_IS_LOCKED(udqhint));
- }
- if (! error) {
- if (dolock)
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- }
-#endif
- return error;
-}
-
-
-/*
- * Given a udquot and gdquot, attach a ptr to the group dquot in the
- * udquot as a hint for future lookups. The idea sounds simple, but the
- * execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering contraints.
- * The process is complicated more by the fact that the dquots may or may not
- * be locked on entry.
- */
-STATIC void
-xfs_qm_dqattach_grouphint(
- xfs_dquot_t *udq,
- xfs_dquot_t *gdq,
- uint locked)
-{
- xfs_dquot_t *tmp;
-
-#ifdef QUOTADEBUG
- if (locked) {
- ASSERT(XFS_DQ_IS_LOCKED(udq));
- ASSERT(XFS_DQ_IS_LOCKED(gdq));
- }
-#endif
- if (! locked)
- xfs_dqlock(udq);
-
- if ((tmp = udq->q_gdquot)) {
- if (tmp == gdq) {
- if (! locked)
- xfs_dqunlock(udq);
- return;
- }
-
- udq->q_gdquot = NULL;
- /*
- * We can't keep any dqlocks when calling dqrele,
- * because the freelist lock comes before dqlocks.
- */
- xfs_dqunlock(udq);
- if (locked)
- xfs_dqunlock(gdq);
- /*
- * we took a hard reference once upon a time in dqget,
- * so give it back when the udquot no longer points at it
- * dqput() does the unlocking of the dquot.
- */
- xfs_qm_dqrele(tmp);
-
- xfs_dqlock(udq);
- xfs_dqlock(gdq);
-
- } else {
- ASSERT(XFS_DQ_IS_LOCKED(udq));
- if (! locked) {
- xfs_dqlock(gdq);
- }
- }
-
- ASSERT(XFS_DQ_IS_LOCKED(udq));
- ASSERT(XFS_DQ_IS_LOCKED(gdq));
- /*
- * Somebody could have attached a gdquot here,
- * when we dropped the uqlock. If so, just do nothing.
- */
- if (udq->q_gdquot == NULL) {
- XFS_DQHOLD(gdq);
- udq->q_gdquot = gdq;
- }
- if (! locked) {
- xfs_dqunlock(gdq);
- xfs_dqunlock(udq);
- }
-}
-
-
-/*
- * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
- * into account.
- * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
- * much made this code a complete mess, but it has been pretty useful.
- * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
- * Inode may get unlocked and relocked in here, and the caller must deal with
- * the consequences.
- */
-int
-xfs_qm_dqattach(
- xfs_inode_t *ip,
- uint flags)
-{
- xfs_mount_t *mp = ip->i_mount;
- uint nquotas = 0;
- int error = 0;
-
- if ((! XFS_IS_QUOTA_ON(mp)) ||
- (! XFS_NOT_DQATTACHED(mp, ip)) ||
- (ip->i_ino == mp->m_sb.sb_uquotino) ||
- (ip->i_ino == mp->m_sb.sb_gquotino))
- return 0;
-
- ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
- XFS_ISLOCKED_INODE_EXCL(ip));
-
- if (! (flags & XFS_QMOPT_ILOCKED))
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- if (XFS_IS_UQUOTA_ON(mp)) {
- error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
- flags & XFS_QMOPT_DQALLOC,
- flags & XFS_QMOPT_DQLOCK,
- NULL, &ip->i_udquot);
- if (error)
- goto done;
- nquotas++;
- }
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
- if (XFS_IS_OQUOTA_ON(mp)) {
- error = XFS_IS_GQUOTA_ON(mp) ?
- xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
- flags & XFS_QMOPT_DQALLOC,
- flags & XFS_QMOPT_DQLOCK,
- ip->i_udquot, &ip->i_gdquot) :
- xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
- flags & XFS_QMOPT_DQALLOC,
- flags & XFS_QMOPT_DQLOCK,
- ip->i_udquot, &ip->i_gdquot);
- /*
- * Don't worry about the udquot that we may have
- * attached above. It'll get detached, if not already.
- */
- if (error)
- goto done;
- nquotas++;
- }
-
- /*
- * Attach this group quota to the user quota as a hint.
- * This WON'T, in general, result in a thrash.
- */
- if (nquotas == 2) {
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
- ASSERT(ip->i_udquot);
- ASSERT(ip->i_gdquot);
-
- /*
- * We may or may not have the i_udquot locked at this point,
- * but this check is OK since we don't depend on the i_gdquot to
- * be accurate 100% all the time. It is just a hint, and this
- * will succeed in general.
- */
- if (ip->i_udquot->q_gdquot == ip->i_gdquot)
- goto done;
- /*
- * Attach i_gdquot to the gdquot hint inside the i_udquot.
- */
- xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot,
- flags & XFS_QMOPT_DQLOCK);
- }
-
- done:
-
-#ifdef QUOTADEBUG
- if (! error) {
- if (ip->i_udquot) {
- if (flags & XFS_QMOPT_DQLOCK)
- ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot));
- }
- if (ip->i_gdquot) {
- if (flags & XFS_QMOPT_DQLOCK)
- ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot));
- }
- if (XFS_IS_UQUOTA_ON(mp))
- ASSERT(ip->i_udquot);
- if (XFS_IS_OQUOTA_ON(mp))
- ASSERT(ip->i_gdquot);
- }
-#endif
-
- if (! (flags & XFS_QMOPT_ILOCKED))
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-#ifdef QUOTADEBUG
- else
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
-#endif
- return error;
-}
-
-/*
- * Release dquots (and their references) if any.
- * The inode should be locked EXCL except when this's called by
- * xfs_ireclaim.
- */
-void
-xfs_qm_dqdetach(
- xfs_inode_t *ip)
-{
- if (!(ip->i_udquot || ip->i_gdquot))
- return;
-
- ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
- ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
- if (ip->i_udquot) {
- xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
- xfs_qm_dqrele(ip->i_udquot);
- ip->i_udquot = NULL;
- }
- if (ip->i_gdquot) {
- xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
- xfs_qm_dqrele(ip->i_gdquot);
- ip->i_gdquot = NULL;
- }
-}
-
-/*
- * This is called by VFS_SYNC and flags arg determines the caller,
- * and its motives, as done in xfs_sync.
- *
- * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31
- * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25
- * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA
- */
-
-int
-xfs_qm_sync(
- xfs_mount_t *mp,
- short flags)
-{
- int recl, restarts;
- xfs_dquot_t *dqp;
- uint flush_flags;
- boolean_t nowait;
- int error;
-
- restarts = 0;
- /*
- * We won't block unless we are asked to.
- */
- nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
-
- again:
- xfs_qm_mplist_lock(mp);
- /*
- * dqpurge_all() also takes the mplist lock and iterate thru all dquots
- * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
- * when we have the mplist lock, we know that dquots will be consistent
- * as long as we have it locked.
- */
- if (! XFS_IS_QUOTA_ON(mp)) {
- xfs_qm_mplist_unlock(mp);
- return 0;
- }
- FOREACH_DQUOT_IN_MP(dqp, mp) {
- /*
- * If this is vfs_sync calling, then skip the dquots that
- * don't 'seem' to be dirty. ie. don't acquire dqlock.
- * This is very similar to what xfs_sync does with inodes.
- */
- if (flags & SYNC_BDFLUSH) {
- if (! XFS_DQ_IS_DIRTY(dqp))
- continue;
- }
-
- if (nowait) {
- /*
- * Try to acquire the dquot lock. We are NOT out of
- * lock order, but we just don't want to wait for this
- * lock, unless somebody wanted us to.
- */
- if (! xfs_qm_dqlock_nowait(dqp))
- continue;
- } else {
- xfs_dqlock(dqp);
- }
-
- /*
- * Now, find out for sure if this dquot is dirty or not.
- */
- if (! XFS_DQ_IS_DIRTY(dqp)) {
- xfs_dqunlock(dqp);
- continue;
- }
-
- /* XXX a sentinel would be better */
- recl = XFS_QI_MPLRECLAIMS(mp);
- if (! xfs_qm_dqflock_nowait(dqp)) {
- if (nowait) {
- xfs_dqunlock(dqp);
- continue;
- }
- /*
- * If we can't grab the flush lock then if the caller
- * really wanted us to give this our best shot,
- * see if we can give a push to the buffer before we wait
- * on the flush lock. At this point, we know that
- * eventhough the dquot is being flushed,
- * it has (new) dirty data.
- */
- xfs_qm_dqflock_pushbuf_wait(dqp);
- }
- /*
- * Let go of the mplist lock. We don't want to hold it
- * across a disk write
- */
- flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
- xfs_qm_mplist_unlock(mp);
- xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
- error = xfs_qm_dqflush(dqp, flush_flags);
- xfs_dqunlock(dqp);
- if (error && XFS_FORCED_SHUTDOWN(mp))
- return 0; /* Need to prevent umount failure */
- else if (error)
- return error;
-
- xfs_qm_mplist_lock(mp);
- if (recl != XFS_QI_MPLRECLAIMS(mp)) {
- if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
- break;
-
- xfs_qm_mplist_unlock(mp);
- goto again;
- }
- }
-
- xfs_qm_mplist_unlock(mp);
- return 0;
-}
-
-
-/*
- * This initializes all the quota information that's kept in the
- * mount structure
- */
-STATIC int
-xfs_qm_init_quotainfo(
- xfs_mount_t *mp)
-{
- xfs_quotainfo_t *qinf;
- int error;
- xfs_dquot_t *dqp;
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- /*
- * Tell XQM that we exist as soon as possible.
- */
- if ((error = xfs_qm_hold_quotafs_ref(mp))) {
- return error;
- }
-
- qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
-
- /*
- * See if quotainodes are setup, and if not, allocate them,
- * and change the superblock accordingly.
- */
- if ((error = xfs_qm_init_quotainos(mp))) {
- kmem_free(qinf, sizeof(xfs_quotainfo_t));
- mp->m_quotainfo = NULL;
- return error;
- }
-
- spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin");
- xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
- qinf->qi_dqreclaims = 0;
-
- /* mutex used to serialize quotaoffs */
- mutex_init(&qinf->qi_quotaofflock);
-
- /* Precalc some constants */
- qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
- ASSERT(qinf->qi_dqchunklen);
- qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
- do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
-
- mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
-
- /*
- * We try to get the limits from the superuser's limits fields.
- * This is quite hacky, but it is standard quota practice.
- * We look at the USR dquot with id == 0 first, but if user quotas
- * are not enabled we goto the GRP dquot with id == 0.
- * We don't really care to keep separate default limits for user
- * and group quotas, at least not at this point.
- */
- error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
- XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
- (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
- XFS_DQ_PROJ),
- XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
- &dqp);
- if (! error) {
- xfs_disk_dquot_t *ddqp = &dqp->q_core;
-
- /*
- * The warnings and timers set the grace period given to
- * a user or group before he or she can not perform any
- * more writing. If it is zero, a default is used.
- */
- qinf->qi_btimelimit = ddqp->d_btimer ?
- be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
- qinf->qi_itimelimit = ddqp->d_itimer ?
- be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
- qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
- be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
- qinf->qi_bwarnlimit = ddqp->d_bwarns ?
- be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
- qinf->qi_iwarnlimit = ddqp->d_iwarns ?
- be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
- qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
- be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
- qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
- qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
- qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
- qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
- qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
- qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
-
- /*
- * We sent the XFS_QMOPT_DQSUSER flag to dqget because
- * we don't want this dquot cached. We haven't done a
- * quotacheck yet, and quotacheck doesn't like incore dquots.
- */
- xfs_qm_dqdestroy(dqp);
- } else {
- qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
- qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
- qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
- qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
- qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
- qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
- }
-
- return 0;
-}
-
-
-/*
- * Gets called when unmounting a filesystem or when all quotas get
- * turned off.
- * This purges the quota inodes, destroys locks and frees itself.
- */
-void
-xfs_qm_destroy_quotainfo(
- xfs_mount_t *mp)
-{
- xfs_quotainfo_t *qi;
-
- qi = mp->m_quotainfo;
- ASSERT(qi != NULL);
- ASSERT(xfs_Gqm != NULL);
-
- /*
- * Release the reference that XQM kept, so that we know
- * when the XQM structure should be freed. We cannot assume
- * that xfs_Gqm is non-null after this point.
- */
- xfs_qm_rele_quotafs_ref(mp);
-
- spinlock_destroy(&qi->qi_pinlock);
- xfs_qm_list_destroy(&qi->qi_dqlist);
-
- if (qi->qi_uquotaip) {
- XFS_PURGE_INODE(qi->qi_uquotaip);
- qi->qi_uquotaip = NULL; /* paranoia */
- }
- if (qi->qi_gquotaip) {
- XFS_PURGE_INODE(qi->qi_gquotaip);
- qi->qi_gquotaip = NULL;
- }
- mutex_destroy(&qi->qi_quotaofflock);
- kmem_free(qi, sizeof(xfs_quotainfo_t));
- mp->m_quotainfo = NULL;
-}
-
-
-
-/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_list_init(
- xfs_dqlist_t *list,
- char *str,
- int n)
-{
- mutex_init(&list->qh_lock);
- list->qh_next = NULL;
- list->qh_version = 0;
- list->qh_nelems = 0;
-}
-
-STATIC void
-xfs_qm_list_destroy(
- xfs_dqlist_t *list)
-{
- mutex_destroy(&(list->qh_lock));
-}
-
-
-/*
- * Stripped down version of dqattach. This doesn't attach, or even look at the
- * dquots attached to the inode. The rationale is that there won't be any
- * attached at the time this is called from quotacheck.
- */
-STATIC int
-xfs_qm_dqget_noattach(
- xfs_inode_t *ip,
- xfs_dquot_t **O_udqpp,
- xfs_dquot_t **O_gdqpp)
-{
- int error;
- xfs_mount_t *mp;
- xfs_dquot_t *udqp, *gdqp;
-
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
- mp = ip->i_mount;
- udqp = NULL;
- gdqp = NULL;
-
- if (XFS_IS_UQUOTA_ON(mp)) {
- ASSERT(ip->i_udquot == NULL);
- /*
- * We want the dquot allocated if it doesn't exist.
- */
- if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
- XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
- &udqp))) {
- /*
- * Shouldn't be able to turn off quotas here.
- */
- ASSERT(error != ESRCH);
- ASSERT(error != ENOENT);
- return error;
- }
- ASSERT(udqp);
- }
-
- if (XFS_IS_OQUOTA_ON(mp)) {
- ASSERT(ip->i_gdquot == NULL);
- if (udqp)
- xfs_dqunlock(udqp);
- error = XFS_IS_GQUOTA_ON(mp) ?
- xfs_qm_dqget(mp, ip,
- ip->i_d.di_gid, XFS_DQ_GROUP,
- XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
- &gdqp) :
- xfs_qm_dqget(mp, ip,
- ip->i_d.di_projid, XFS_DQ_PROJ,
- XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
- &gdqp);
- if (error) {
- if (udqp)
- xfs_qm_dqrele(udqp);
- ASSERT(error != ESRCH);
- ASSERT(error != ENOENT);
- return error;
- }
- ASSERT(gdqp);
-
- /* Reacquire the locks in the right order */
- if (udqp) {
- if (! xfs_qm_dqlock_nowait(udqp)) {
- xfs_dqunlock(gdqp);
- xfs_dqlock(udqp);
- xfs_dqlock(gdqp);
- }
- }
- }
-
- *O_udqpp = udqp;
- *O_gdqpp = gdqp;
-
-#ifdef QUOTADEBUG
- if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
- if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
-#endif
- return 0;
-}
-
-/*
- * Create an inode and return with a reference already taken, but unlocked
- * This is how we create quota inodes
- */
-STATIC int
-xfs_qm_qino_alloc(
- xfs_mount_t *mp,
- xfs_inode_t **ip,
- __int64_t sbfields,
- uint flags)
-{
- xfs_trans_t *tp;
- int error;
- unsigned long s;
- cred_t zerocr;
- xfs_inode_t zeroino;
- int committed;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
- if ((error = xfs_trans_reserve(tp,
- XFS_QM_QINOCREATE_SPACE_RES(mp),
- XFS_CREATE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_CREATE_LOG_COUNT))) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
- memset(&zerocr, 0, sizeof(zerocr));
- memset(&zeroino, 0, sizeof(zeroino));
-
- if ((error = xfs_dir_ialloc(&tp, &zeroino, S_IFREG, 1, 0,
- &zerocr, 0, 1, ip, &committed))) {
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
- XFS_TRANS_ABORT);
- return error;
- }
-
- /*
- * Keep an extra reference to this quota inode. This inode is
- * locked exclusively and joined to the transaction already.
- */
- ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip));
- VN_HOLD(XFS_ITOV((*ip)));
-
- /*
- * Make the changes in the superblock, and log those too.
- * sbfields arg may contain fields other than *QUOTINO;
- * VERSIONNUM for example.
- */
- s = XFS_SB_LOCK(mp);
- if (flags & XFS_QMOPT_SBVERSION) {
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
- unsigned oldv = mp->m_sb.sb_versionnum;
-#endif
- ASSERT(!XFS_SB_VERSION_HASQUOTA(&mp->m_sb));
- ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
- XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
- (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
- XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
-
- XFS_SB_VERSION_ADDQUOTA(&mp->m_sb);
- mp->m_sb.sb_uquotino = NULLFSINO;
- mp->m_sb.sb_gquotino = NULLFSINO;
-
- /* qflags will get updated _after_ quotacheck */
- mp->m_sb.sb_qflags = 0;
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
- cmn_err(CE_NOTE,
- "Old superblock version %x, converting to %x.",
- oldv, mp->m_sb.sb_versionnum);
-#endif
- }
- if (flags & XFS_QMOPT_UQUOTA)
- mp->m_sb.sb_uquotino = (*ip)->i_ino;
- else
- mp->m_sb.sb_gquotino = (*ip)->i_ino;
- XFS_SB_UNLOCK(mp, s);
- xfs_mod_sb(tp, sbfields);
-
- if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
- NULL))) {
- xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
- return error;
- }
- return 0;
-}
-
-
-STATIC int
-xfs_qm_reset_dqcounts(
- xfs_mount_t *mp,
- xfs_buf_t *bp,
- xfs_dqid_t id,
- uint type)
-{
- xfs_disk_dquot_t *ddq;
- int j;
-
- xfs_buftrace("RESET DQUOTS", bp);
- /*
- * Reset all counters and timers. They'll be
- * started afresh by xfs_qm_quotacheck.
- */
-#ifdef DEBUG
- j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
- do_div(j, sizeof(xfs_dqblk_t));
- ASSERT(XFS_QM_DQPERBLK(mp) == j);
-#endif
- ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
- for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
- /*
- * Do a sanity check, and if needed, repair the dqblk. Don't
- * output any warnings because it's perfectly possible to
- * find unitialized dquot blks. See comment in xfs_qm_dqcheck.
- */
- (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
- "xfs_quotacheck");
- ddq->d_bcount = 0;
- ddq->d_icount = 0;
- ddq->d_rtbcount = 0;
- ddq->d_btimer = 0;
- ddq->d_itimer = 0;
- ddq->d_rtbtimer = 0;
- ddq->d_bwarns = 0;
- ddq->d_iwarns = 0;
- ddq->d_rtbwarns = 0;
- ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
- }
-
- return 0;
-}
-
-STATIC int
-xfs_qm_dqiter_bufs(
- xfs_mount_t *mp,
- xfs_dqid_t firstid,
- xfs_fsblock_t bno,
- xfs_filblks_t blkcnt,
- uint flags)
-{
- xfs_buf_t *bp;
- int error;
- int notcommitted;
- int incr;
- int type;
-
- ASSERT(blkcnt > 0);
- notcommitted = 0;
- incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
- XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
- type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
- (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
- error = 0;
-
- /*
- * Blkcnt arg can be a very big number, and might even be
- * larger than the log itself. So, we have to break it up into
- * manageable-sized transactions.
- * Note that we don't start a permanent transaction here; we might
- * not be able to get a log reservation for the whole thing up front,
- * and we don't really care to either, because we just discard
- * everything if we were to crash in the middle of this loop.
- */
- while (blkcnt--) {
- error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(mp, bno),
- (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
- if (error)
- break;
-
- (void) xfs_qm_reset_dqcounts(mp, bp, firstid, type);
- xfs_bdwrite(mp, bp);
- /*
- * goto the next block.
- */
- bno++;
- firstid += XFS_QM_DQPERBLK(mp);
- }
- return error;
-}
-
-/*
- * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
- * caller supplied function for every chunk of dquots that we find.
- */
-STATIC int
-xfs_qm_dqiterate(
- xfs_mount_t *mp,
- xfs_inode_t *qip,
- uint flags)
-{
- xfs_bmbt_irec_t *map;
- int i, nmaps; /* number of map entries */
- int error; /* return value */
- xfs_fileoff_t lblkno;
- xfs_filblks_t maxlblkcnt;
- xfs_dqid_t firstid;
- xfs_fsblock_t rablkno;
- xfs_filblks_t rablkcnt;
-
- error = 0;
- /*
- * This looks racey, but we can't keep an inode lock across a
- * trans_reserve. But, this gets called during quotacheck, and that
- * happens only at mount time which is single threaded.
- */
- if (qip->i_d.di_nblocks == 0)
- return 0;
-
- map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
-
- lblkno = 0;
- maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
- do {
- nmaps = XFS_DQITER_MAP_SIZE;
- /*
- * We aren't changing the inode itself. Just changing
- * some of its data. No new blocks are added here, and
- * the inode is never added to the transaction.
- */
- xfs_ilock(qip, XFS_ILOCK_SHARED);
- error = xfs_bmapi(NULL, qip, lblkno,
- maxlblkcnt - lblkno,
- XFS_BMAPI_METADATA,
- NULL,
- 0, map, &nmaps, NULL);
- xfs_iunlock(qip, XFS_ILOCK_SHARED);
- if (error)
- break;
-
- ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
- for (i = 0; i < nmaps; i++) {
- ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
- ASSERT(map[i].br_blockcount);
-
-
- lblkno += map[i].br_blockcount;
-
- if (map[i].br_startblock == HOLESTARTBLOCK)
- continue;
-
- firstid = (xfs_dqid_t) map[i].br_startoff *
- XFS_QM_DQPERBLK(mp);
- /*
- * Do a read-ahead on the next extent.
- */
- if ((i+1 < nmaps) &&
- (map[i+1].br_startblock != HOLESTARTBLOCK)) {
- rablkcnt = map[i+1].br_blockcount;
- rablkno = map[i+1].br_startblock;
- while (rablkcnt--) {
- xfs_baread(mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(mp, rablkno),
- (int)XFS_QI_DQCHUNKLEN(mp));
- rablkno++;
- }
- }
- /*
- * Iterate thru all the blks in the extent and
- * reset the counters of all the dquots inside them.
- */
- if ((error = xfs_qm_dqiter_bufs(mp,
- firstid,
- map[i].br_startblock,
- map[i].br_blockcount,
- flags))) {
- break;
- }
- }
-
- if (error)
- break;
- } while (nmaps > 0);
-
- kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
-
- return error;
-}
-
-/*
- * Called by dqusage_adjust in doing a quotacheck.
- * Given the inode, and a dquot (either USR or GRP, doesn't matter),
- * this updates its incore copy as well as the buffer copy. This is
- * so that once the quotacheck is done, we can just log all the buffers,
- * as opposed to logging numerous updates to individual dquots.
- */
-STATIC void
-xfs_qm_quotacheck_dqadjust(
- xfs_dquot_t *dqp,
- xfs_qcnt_t nblks,
- xfs_qcnt_t rtblks)
-{
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- xfs_dqtrace_entry(dqp, "QCHECK DQADJUST");
- /*
- * Adjust the inode count and the block count to reflect this inode's
- * resource usage.
- */
- be64_add(&dqp->q_core.d_icount, 1);
- dqp->q_res_icount++;
- if (nblks) {
- be64_add(&dqp->q_core.d_bcount, nblks);
- dqp->q_res_bcount += nblks;
- }
- if (rtblks) {
- be64_add(&dqp->q_core.d_rtbcount, rtblks);
- dqp->q_res_rtbcount += rtblks;
- }
-
- /*
- * Set default limits, adjust timers (since we changed usages)
- */
- if (! XFS_IS_SUSER_DQUOT(dqp)) {
- xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
- xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
- }
-
- dqp->dq_flags |= XFS_DQ_DIRTY;
-}
-
-STATIC int
-xfs_qm_get_rtblks(
- xfs_inode_t *ip,
- xfs_qcnt_t *O_rtblks)
-{
- xfs_filblks_t rtblks; /* total rt blks */
- xfs_ifork_t *ifp; /* inode fork pointer */
- xfs_extnum_t nextents; /* number of extent entries */
- xfs_bmbt_rec_t *base; /* base of extent array */
- xfs_bmbt_rec_t *ep; /* pointer to an extent entry */
- int error;
-
- ASSERT(XFS_IS_REALTIME_INODE(ip));
- ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
- if (!(ifp->if_flags & XFS_IFEXTENTS)) {
- if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
- return error;
- }
- rtblks = 0;
- nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
- base = &ifp->if_u1.if_extents[0];
- for (ep = base; ep < &base[nextents]; ep++)
- rtblks += xfs_bmbt_get_blockcount(ep);
- *O_rtblks = (xfs_qcnt_t)rtblks;
- return 0;
-}
-
-/*
- * callback routine supplied to bulkstat(). Given an inumber, find its
- * dquots and update them to account for resources taken by that inode.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqusage_adjust(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* not used */
- int ubsize, /* not used */
- void *private_data, /* not used */
- xfs_daddr_t bno, /* starting block of inode cluster */
- int *ubused, /* not used */
- void *dip, /* on-disk inode pointer (not used) */
- int *res) /* result code value */
-{
- xfs_inode_t *ip;
- xfs_dquot_t *udqp, *gdqp;
- xfs_qcnt_t nblks, rtblks;
- int error;
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- /*
- * rootino must have its resources accounted for, not so with the quota
- * inodes.
- */
- if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
- *res = BULKSTAT_RV_NOTHING;
- return XFS_ERROR(EINVAL);
- }
-
- /*
- * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
- * interface expects the inode to be exclusively locked because that's
- * the case in all other instances. It's OK that we do this because
- * quotacheck is done only at mount time.
- */
- if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
- *res = BULKSTAT_RV_NOTHING;
- return error;
- }
-
- if (ip->i_d.di_mode == 0) {
- xfs_iput_new(ip, XFS_ILOCK_EXCL);
- *res = BULKSTAT_RV_NOTHING;
- return XFS_ERROR(ENOENT);
- }
-
- /*
- * Obtain the locked dquots. In case of an error (eg. allocation
- * fails for ENOSPC), we return the negative of the error number
- * to bulkstat, so that it can get propagated to quotacheck() and
- * making us disable quotas for the file system.
- */
- if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
- xfs_iput(ip, XFS_ILOCK_EXCL);
- *res = BULKSTAT_RV_GIVEUP;
- return error;
- }
-
- rtblks = 0;
- if (! XFS_IS_REALTIME_INODE(ip)) {
- nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
- } else {
- /*
- * Walk thru the extent list and count the realtime blocks.
- */
- if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
- xfs_iput(ip, XFS_ILOCK_EXCL);
- if (udqp)
- xfs_qm_dqput(udqp);
- if (gdqp)
- xfs_qm_dqput(gdqp);
- *res = BULKSTAT_RV_GIVEUP;
- return error;
- }
- nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
- }
- ASSERT(ip->i_delayed_blks == 0);
-
- /*
- * We can't release the inode while holding its dquot locks.
- * The inode can go into inactive and might try to acquire the dquotlocks.
- * So, just unlock here and do a vn_rele at the end.
- */
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- /*
- * Add the (disk blocks and inode) resources occupied by this
- * inode to its dquots. We do this adjustment in the incore dquot,
- * and also copy the changes to its buffer.
- * We don't care about putting these changes in a transaction
- * envelope because if we crash in the middle of a 'quotacheck'
- * we have to start from the beginning anyway.
- * Once we're done, we'll log all the dquot bufs.
- *
- * The *QUOTA_ON checks below may look pretty racey, but quotachecks
- * and quotaoffs don't race. (Quotachecks happen at mount time only).
- */
- if (XFS_IS_UQUOTA_ON(mp)) {
- ASSERT(udqp);
- xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
- xfs_qm_dqput(udqp);
- }
- if (XFS_IS_OQUOTA_ON(mp)) {
- ASSERT(gdqp);
- xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
- xfs_qm_dqput(gdqp);
- }
- /*
- * Now release the inode. This will send it to 'inactive', and
- * possibly even free blocks.
- */
- VN_RELE(XFS_ITOV(ip));
-
- /*
- * Goto next inode.
- */
- *res = BULKSTAT_RV_DIDONE;
- return 0;
-}
-
-/*
- * Walk thru all the filesystem inodes and construct a consistent view
- * of the disk quota world. If the quotacheck fails, disable quotas.
- */
-int
-xfs_qm_quotacheck(
- xfs_mount_t *mp)
-{
- int done, count, error;
- xfs_ino_t lastino;
- size_t structsz;
- xfs_inode_t *uip, *gip;
- uint flags;
-
- count = INT_MAX;
- structsz = 1;
- lastino = 0;
- flags = 0;
-
- ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- /*
- * There should be no cached dquots. The (simplistic) quotacheck
- * algorithm doesn't like that.
- */
- ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0);
-
- cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
-
- /*
- * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
- * their counters to zero. We need a clean slate.
- * We don't log our changes till later.
- */
- if ((uip = XFS_QI_UQIP(mp))) {
- if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
- goto error_return;
- flags |= XFS_UQUOTA_CHKD;
- }
-
- if ((gip = XFS_QI_GQIP(mp))) {
- if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
- XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA)))
- goto error_return;
- flags |= XFS_OQUOTA_CHKD;
- }
-
- do {
- /*
- * Iterate thru all the inodes in the file system,
- * adjusting the corresponding dquot counters in core.
- */
- if ((error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_dqusage_adjust, NULL,
- structsz, NULL,
- BULKSTAT_FG_IGET|BULKSTAT_FG_VFSLOCKED,
- &done)))
- break;
-
- } while (! done);
-
- /*
- * We can get this error if we couldn't do a dquot allocation inside
- * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
- * dirty dquots that might be cached, we just want to get rid of them
- * and turn quotaoff. The dquots won't be attached to any of the inodes
- * at this point (because we intentionally didn't in dqget_noattach).
- */
- if (error) {
- xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
- goto error_return;
- }
- /*
- * We've made all the changes that we need to make incore.
- * Now flush_them down to disk buffers.
- */
- xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
-
- /*
- * We didn't log anything, because if we crashed, we'll have to
- * start the quotacheck from scratch anyway. However, we must make
- * sure that our dquot changes are secure before we put the
- * quotacheck'd stamp on the superblock. So, here we do a synchronous
- * flush.
- */
- XFS_bflush(mp->m_ddev_targp);
-
- /*
- * If one type of quotas is off, then it will lose its
- * quotachecked status, since we won't be doing accounting for
- * that type anymore.
- */
- mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
- mp->m_qflags |= flags;
-
- XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
-
- error_return:
- if (error) {
- cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): "
- "Disabling quotas.",
- mp->m_fsname, error);
- /*
- * We must turn off quotas.
- */
- ASSERT(mp->m_quotainfo != NULL);
- ASSERT(xfs_Gqm != NULL);
- xfs_qm_destroy_quotainfo(mp);
- (void)xfs_mount_reset_sbqflags(mp);
- } else {
- cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
- }
- return (error);
-}
-
-/*
- * This is called after the superblock has been read in and we're ready to
- * iget the quota inodes.
- */
-STATIC int
-xfs_qm_init_quotainos(
- xfs_mount_t *mp)
-{
- xfs_inode_t *uip, *gip;
- int error;
- __int64_t sbflags;
- uint flags;
-
- ASSERT(mp->m_quotainfo);
- uip = gip = NULL;
- sbflags = 0;
- flags = 0;
-
- /*
- * Get the uquota and gquota inodes
- */
- if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
- if (XFS_IS_UQUOTA_ON(mp) &&
- mp->m_sb.sb_uquotino != NULLFSINO) {
- ASSERT(mp->m_sb.sb_uquotino > 0);
- if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
- 0, 0, &uip, 0)))
- return XFS_ERROR(error);
- }
- if (XFS_IS_OQUOTA_ON(mp) &&
- mp->m_sb.sb_gquotino != NULLFSINO) {
- ASSERT(mp->m_sb.sb_gquotino > 0);
- if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
- 0, 0, &gip, 0))) {
- if (uip)
- VN_RELE(XFS_ITOV(uip));
- return XFS_ERROR(error);
- }
- }
- } else {
- flags |= XFS_QMOPT_SBVERSION;
- sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
- XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
- }
-
- /*
- * Create the two inodes, if they don't exist already. The changes
- * made above will get added to a transaction and logged in one of
- * the qino_alloc calls below. If the device is readonly,
- * temporarily switch to read-write to do this.
- */
- if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
- if ((error = xfs_qm_qino_alloc(mp, &uip,
- sbflags | XFS_SB_UQUOTINO,
- flags | XFS_QMOPT_UQUOTA)))
- return XFS_ERROR(error);
-
- flags &= ~XFS_QMOPT_SBVERSION;
- }
- if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
- flags |= (XFS_IS_GQUOTA_ON(mp) ?
- XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
- error = xfs_qm_qino_alloc(mp, &gip,
- sbflags | XFS_SB_GQUOTINO, flags);
- if (error) {
- if (uip)
- VN_RELE(XFS_ITOV(uip));
-
- return XFS_ERROR(error);
- }
- }
-
- XFS_QI_UQIP(mp) = uip;
- XFS_QI_GQIP(mp) = gip;
-
- return 0;
-}
-
-
-/*
- * Traverse the freelist of dquots and attempt to reclaim a maximum of
- * 'howmany' dquots. This operation races with dqlookup(), and attempts to
- * favor the lookup function ...
- * XXXsup merge this with qm_reclaim_one().
- */
-STATIC int
-xfs_qm_shake_freelist(
- int howmany)
-{
- int nreclaimed;
- xfs_dqhash_t *hash;
- xfs_dquot_t *dqp, *nextdqp;
- int restarts;
- int nflushes;
-
- if (howmany <= 0)
- return 0;
-
- nreclaimed = 0;
- restarts = 0;
- nflushes = 0;
-
-#ifdef QUOTADEBUG
- cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
-#endif
- /* lock order is : hashchainlock, freelistlock, mplistlock */
- tryagain:
- xfs_qm_freelist_lock(xfs_Gqm);
-
- for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
- ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
- nreclaimed < howmany); ) {
- xfs_dqlock(dqp);
-
- /*
- * We are racing with dqlookup here. Naturally we don't
- * want to reclaim a dquot that lookup wants.
- */
- if (dqp->dq_flags & XFS_DQ_WANT) {
- xfs_dqunlock(dqp);
- xfs_qm_freelist_unlock(xfs_Gqm);
- if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
- return nreclaimed;
- XQM_STATS_INC(xqmstats.xs_qm_dqwants);
- goto tryagain;
- }
-
- /*
- * If the dquot is inactive, we are assured that it is
- * not on the mplist or the hashlist, and that makes our
- * life easier.
- */
- if (dqp->dq_flags & XFS_DQ_INACTIVE) {
- ASSERT(dqp->q_mount == NULL);
- ASSERT(! XFS_DQ_IS_DIRTY(dqp));
- ASSERT(dqp->HL_PREVP == NULL);
- ASSERT(dqp->MPL_PREVP == NULL);
- XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
- nextdqp = dqp->dq_flnext;
- goto off_freelist;
- }
-
- ASSERT(dqp->MPL_PREVP);
- /*
- * Try to grab the flush lock. If this dquot is in the process of
- * getting flushed to disk, we don't want to reclaim it.
- */
- if (! xfs_qm_dqflock_nowait(dqp)) {
- xfs_dqunlock(dqp);
- dqp = dqp->dq_flnext;
- continue;
- }
-
- /*
- * We have the flush lock so we know that this is not in the
- * process of being flushed. So, if this is dirty, flush it
- * DELWRI so that we don't get a freelist infested with
- * dirty dquots.
- */
- if (XFS_DQ_IS_DIRTY(dqp)) {
- xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
- /*
- * We flush it delayed write, so don't bother
- * releasing the mplock.
- */
- (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
- xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
- dqp = dqp->dq_flnext;
- continue;
- }
- /*
- * We're trying to get the hashlock out of order. This races
- * with dqlookup; so, we giveup and goto the next dquot if
- * we couldn't get the hashlock. This way, we won't starve
- * a dqlookup process that holds the hashlock that is
- * waiting for the freelist lock.
- */
- if (! xfs_qm_dqhashlock_nowait(dqp)) {
- xfs_dqfunlock(dqp);
- xfs_dqunlock(dqp);
- dqp = dqp->dq_flnext;
- continue;
- }
- /*
- * This races with dquot allocation code as well as dqflush_all
- * and reclaim code. So, if we failed to grab the mplist lock,
- * giveup everything and start over.
- */
- hash = dqp->q_hash;
- ASSERT(hash);
- if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
- /* XXX put a sentinel so that we can come back here */
- xfs_dqfunlock(dqp);
- xfs_dqunlock(dqp);
- XFS_DQ_HASH_UNLOCK(hash);
- xfs_qm_freelist_unlock(xfs_Gqm);
- if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
- return nreclaimed;
- goto tryagain;
- }
- xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
-#ifdef QUOTADEBUG
- cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
- dqp, be32_to_cpu(dqp->q_core.d_id));
-#endif
- ASSERT(dqp->q_nrefs == 0);
- nextdqp = dqp->dq_flnext;
- XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
- XQM_HASHLIST_REMOVE(hash, dqp);
- xfs_dqfunlock(dqp);
- xfs_qm_mplist_unlock(dqp->q_mount);
- XFS_DQ_HASH_UNLOCK(hash);
-
- off_freelist:
- XQM_FREELIST_REMOVE(dqp);
- xfs_dqunlock(dqp);
- nreclaimed++;
- XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
- xfs_qm_dqdestroy(dqp);
- dqp = nextdqp;
- }
- xfs_qm_freelist_unlock(xfs_Gqm);
- return nreclaimed;
-}
-
-
-/*
- * The kmem_shake interface is invoked when memory is running low.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
-{
- int ndqused, nfree, n;
-
- if (!kmem_shake_allow(gfp_mask))
- return 0;
- if (!xfs_Gqm)
- return 0;
-
- nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
- /* incore dquots in all f/s's */
- ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
-
- ASSERT(ndqused >= 0);
-
- if (nfree <= ndqused && nfree < ndquot)
- return 0;
-
- ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */
- n = nfree - ndqused - ndquot; /* # over target */
-
- return xfs_qm_shake_freelist(MAX(nfree, n));
-}
-
-
-/*
- * Just pop the least recently used dquot off the freelist and
- * recycle it. The returned dquot is locked.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqreclaim_one(void)
-{
- xfs_dquot_t *dqpout;
- xfs_dquot_t *dqp;
- int restarts;
- int nflushes;
-
- restarts = 0;
- dqpout = NULL;
- nflushes = 0;
-
- /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
- startagain:
- xfs_qm_freelist_lock(xfs_Gqm);
-
- FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
- xfs_dqlock(dqp);
-
- /*
- * We are racing with dqlookup here. Naturally we don't
- * want to reclaim a dquot that lookup wants. We release the
- * freelist lock and start over, so that lookup will grab
- * both the dquot and the freelistlock.
- */
- if (dqp->dq_flags & XFS_DQ_WANT) {
- ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
- xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT");
- xfs_dqunlock(dqp);
- xfs_qm_freelist_unlock(xfs_Gqm);
- if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
- return NULL;
- XQM_STATS_INC(xqmstats.xs_qm_dqwants);
- goto startagain;
- }
-
- /*
- * If the dquot is inactive, we are assured that it is
- * not on the mplist or the hashlist, and that makes our
- * life easier.
- */
- if (dqp->dq_flags & XFS_DQ_INACTIVE) {
- ASSERT(dqp->q_mount == NULL);
- ASSERT(! XFS_DQ_IS_DIRTY(dqp));
- ASSERT(dqp->HL_PREVP == NULL);
- ASSERT(dqp->MPL_PREVP == NULL);
- XQM_FREELIST_REMOVE(dqp);
- xfs_dqunlock(dqp);
- dqpout = dqp;
- XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
- break;
- }
-
- ASSERT(dqp->q_hash);
- ASSERT(dqp->MPL_PREVP);
-
- /*
- * Try to grab the flush lock. If this dquot is in the process of
- * getting flushed to disk, we don't want to reclaim it.
- */
- if (! xfs_qm_dqflock_nowait(dqp)) {
- xfs_dqunlock(dqp);
- continue;
- }
-
- /*
- * We have the flush lock so we know that this is not in the
- * process of being flushed. So, if this is dirty, flush it
- * DELWRI so that we don't get a freelist infested with
- * dirty dquots.
- */
- if (XFS_DQ_IS_DIRTY(dqp)) {
- xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
- /*
- * We flush it delayed write, so don't bother
- * releasing the freelist lock.
- */
- (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
- xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
- continue;
- }
-
- if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
- xfs_dqfunlock(dqp);
- xfs_dqunlock(dqp);
- continue;
- }
-
- if (! xfs_qm_dqhashlock_nowait(dqp))
- goto mplistunlock;
-
- ASSERT(dqp->q_nrefs == 0);
- xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
- XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
- XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
- XQM_FREELIST_REMOVE(dqp);
- dqpout = dqp;
- XFS_DQ_HASH_UNLOCK(dqp->q_hash);
- mplistunlock:
- xfs_qm_mplist_unlock(dqp->q_mount);
- xfs_dqfunlock(dqp);
- xfs_dqunlock(dqp);
- if (dqpout)
- break;
- }
-
- xfs_qm_freelist_unlock(xfs_Gqm);
- return dqpout;
-}
-
-
-/*------------------------------------------------------------------*/
-
-/*
- * Return a new incore dquot. Depending on the number of
- * dquots in the system, we either allocate a new one on the kernel heap,
- * or reclaim a free one.
- * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
- * to reclaim an existing one from the freelist.
- */
-boolean_t
-xfs_qm_dqalloc_incore(
- xfs_dquot_t **O_dqpp)
-{
- xfs_dquot_t *dqp;
-
- /*
- * Check against high water mark to see if we want to pop
- * a nincompoop dquot off the freelist.
- */
- if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
- /*
- * Try to recycle a dquot from the freelist.
- */
- if ((dqp = xfs_qm_dqreclaim_one())) {
- XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
- /*
- * Just zero the core here. The rest will get
- * reinitialized by caller. XXX we shouldn't even
- * do this zero ...
- */
- memset(&dqp->q_core, 0, sizeof(dqp->q_core));
- *O_dqpp = dqp;
- return B_FALSE;
- }
- XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
- }
-
- /*
- * Allocate a brand new dquot on the kernel heap and return it
- * to the caller to initialize.
- */
- ASSERT(xfs_Gqm->qm_dqzone != NULL);
- *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
- atomic_inc(&xfs_Gqm->qm_totaldquots);
-
- return B_TRUE;
-}
-
-
-/*
- * Start a transaction and write the incore superblock changes to
- * disk. flags parameter indicates which fields have changed.
- */
-int
-xfs_qm_write_sb_changes(
- xfs_mount_t *mp,
- __int64_t flags)
-{
- xfs_trans_t *tp;
- int error;
-
-#ifdef QUOTADEBUG
- cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname);
-#endif
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
- if ((error = xfs_trans_reserve(tp, 0,
- mp->m_sb.sb_sectsize + 128, 0,
- 0,
- XFS_DEFAULT_LOG_COUNT))) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- xfs_mod_sb(tp, flags);
- (void) xfs_trans_commit(tp, 0, NULL);
-
- return 0;
-}
-
-
-/* --------------- utility functions for vnodeops ---------------- */
-
-
-/*
- * Given an inode, a uid and gid (from cred_t) make sure that we have
- * allocated relevant dquot(s) on disk, and that we won't exceed inode
- * quotas by creating this file.
- * This also attaches dquot(s) to the given inode after locking it,
- * and returns the dquots corresponding to the uid and/or gid.
- *
- * in : inode (unlocked)
- * out : udquot, gdquot with references taken and unlocked
- */
-int
-xfs_qm_vop_dqalloc(
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- uid_t uid,
- gid_t gid,
- prid_t prid,
- uint flags,
- xfs_dquot_t **O_udqpp,
- xfs_dquot_t **O_gdqpp)
-{
- int error;
- xfs_dquot_t *uq, *gq;
- uint lockflags;
-
- if (!XFS_IS_QUOTA_ON(mp))
- return 0;
-
- lockflags = XFS_ILOCK_EXCL;
- xfs_ilock(ip, lockflags);
-
- if ((flags & XFS_QMOPT_INHERIT) &&
- XFS_INHERIT_GID(ip, XFS_MTOVFS(mp)))
- gid = ip->i_d.di_gid;
-
- /*
- * Attach the dquot(s) to this inode, doing a dquot allocation
- * if necessary. The dquot(s) will not be locked.
- */
- if (XFS_NOT_DQATTACHED(mp, ip)) {
- if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
- XFS_QMOPT_ILOCKED))) {
- xfs_iunlock(ip, lockflags);
- return error;
- }
- }
-
- uq = gq = NULL;
- if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
- if (ip->i_d.di_uid != uid) {
- /*
- * What we need is the dquot that has this uid, and
- * if we send the inode to dqget, the uid of the inode
- * takes priority over what's sent in the uid argument.
- * We must unlock inode here before calling dqget if
- * we're not sending the inode, because otherwise
- * we'll deadlock by doing trans_reserve while
- * holding ilock.
- */
- xfs_iunlock(ip, lockflags);
- if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
- XFS_DQ_USER,
- XFS_QMOPT_DQALLOC |
- XFS_QMOPT_DOWARN,
- &uq))) {
- ASSERT(error != ENOENT);
- return error;
- }
- /*
- * Get the ilock in the right order.
- */
- xfs_dqunlock(uq);
- lockflags = XFS_ILOCK_SHARED;
- xfs_ilock(ip, lockflags);
- } else {
- /*
- * Take an extra reference, because we'll return
- * this to caller
- */
- ASSERT(ip->i_udquot);
- uq = ip->i_udquot;
- xfs_dqlock(uq);
- XFS_DQHOLD(uq);
- xfs_dqunlock(uq);
- }
- }
- if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
- if (ip->i_d.di_gid != gid) {
- xfs_iunlock(ip, lockflags);
- if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
- XFS_DQ_GROUP,
- XFS_QMOPT_DQALLOC |
- XFS_QMOPT_DOWARN,
- &gq))) {
- if (uq)
- xfs_qm_dqrele(uq);
- ASSERT(error != ENOENT);
- return error;
- }
- xfs_dqunlock(gq);
- lockflags = XFS_ILOCK_SHARED;
- xfs_ilock(ip, lockflags);
- } else {
- ASSERT(ip->i_gdquot);
- gq = ip->i_gdquot;
- xfs_dqlock(gq);
- XFS_DQHOLD(gq);
- xfs_dqunlock(gq);
- }
- } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
- if (ip->i_d.di_projid != prid) {
- xfs_iunlock(ip, lockflags);
- if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
- XFS_DQ_PROJ,
- XFS_QMOPT_DQALLOC |
- XFS_QMOPT_DOWARN,
- &gq))) {
- if (uq)
- xfs_qm_dqrele(uq);
- ASSERT(error != ENOENT);
- return (error);
- }
- xfs_dqunlock(gq);
- lockflags = XFS_ILOCK_SHARED;
- xfs_ilock(ip, lockflags);
- } else {
- ASSERT(ip->i_gdquot);
- gq = ip->i_gdquot;
- xfs_dqlock(gq);
- XFS_DQHOLD(gq);
- xfs_dqunlock(gq);
- }
- }
- if (uq)
- xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
-
- xfs_iunlock(ip, lockflags);
- if (O_udqpp)
- *O_udqpp = uq;
- else if (uq)
- xfs_qm_dqrele(uq);
- if (O_gdqpp)
- *O_gdqpp = gq;
- else if (gq)
- xfs_qm_dqrele(gq);
- return 0;
-}
-
-/*
- * Actually transfer ownership, and do dquot modifications.
- * These were already reserved.
- */
-xfs_dquot_t *
-xfs_qm_vop_chown(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_dquot_t **IO_olddq,
- xfs_dquot_t *newdq)
-{
- xfs_dquot_t *prevdq;
- uint bfield = XFS_IS_REALTIME_INODE(ip) ?
- XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
-
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
- ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
-
- /* old dquot */
- prevdq = *IO_olddq;
- ASSERT(prevdq);
- ASSERT(prevdq != newdq);
-
- xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
- xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
-
- /* the sparkling new dquot */
- xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
- xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
-
- /*
- * Take an extra reference, because the inode
- * is going to keep this dquot pointer even
- * after the trans_commit.
- */
- xfs_dqlock(newdq);
- XFS_DQHOLD(newdq);
- xfs_dqunlock(newdq);
- *IO_olddq = newdq;
-
- return prevdq;
-}
-
-/*
- * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
- */
-int
-xfs_qm_vop_chown_reserve(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_dquot_t *udqp,
- xfs_dquot_t *gdqp,
- uint flags)
-{
- int error;
- xfs_mount_t *mp;
- uint delblks, blkflags;
- xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
-
- ASSERT(XFS_ISLOCKED_INODE(ip));
- mp = ip->i_mount;
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- delblks = ip->i_delayed_blks;
- delblksudq = delblksgdq = unresudq = unresgdq = NULL;
- blkflags = XFS_IS_REALTIME_INODE(ip) ?
- XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
-
- if (XFS_IS_UQUOTA_ON(mp) && udqp &&
- ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
- delblksudq = udqp;
- /*
- * If there are delayed allocation blocks, then we have to
- * unreserve those from the old dquot, and add them to the
- * new dquot.
- */
- if (delblks) {
- ASSERT(ip->i_udquot);
- unresudq = ip->i_udquot;
- }
- }
- if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
- if ((XFS_IS_GQUOTA_ON(ip->i_mount) &&
- ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id)) ||
- (XFS_IS_PQUOTA_ON(ip->i_mount) &&
- ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))) {
- delblksgdq = gdqp;
- if (delblks) {
- ASSERT(ip->i_gdquot);
- unresgdq = ip->i_gdquot;
- }
- }
- }
-
- if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
- delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
- flags | blkflags)))
- return (error);
-
- /*
- * Do the delayed blks reservations/unreservations now. Since, these
- * are done without the help of a transaction, if a reservation fails
- * its previous reservations won't be automatically undone by trans
- * code. So, we have to do it manually here.
- */
- if (delblks) {
- /*
- * Do the reservations first. Unreservation can't fail.
- */
- ASSERT(delblksudq || delblksgdq);
- ASSERT(unresudq || unresgdq);
- if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
- delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
- flags | blkflags)))
- return (error);
- xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
- unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
- blkflags);
- }
-
- return (0);
-}
-
-int
-xfs_qm_vop_rename_dqattach(
- xfs_inode_t **i_tab)
-{
- xfs_inode_t *ip;
- int i;
- int error;
-
- ip = i_tab[0];
-
- if (! XFS_IS_QUOTA_ON(ip->i_mount))
- return 0;
-
- if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
- error = xfs_qm_dqattach(ip, 0);
- if (error)
- return error;
- }
- for (i = 1; (i < 4 && i_tab[i]); i++) {
- /*
- * Watch out for duplicate entries in the table.
- */
- if ((ip = i_tab[i]) != i_tab[i-1]) {
- if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
- error = xfs_qm_dqattach(ip, 0);
- if (error)
- return error;
- }
- }
- }
- return 0;
-}
-
-void
-xfs_qm_vop_dqattach_and_dqmod_newinode(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_dquot_t *udqp,
- xfs_dquot_t *gdqp)
-{
- if (!XFS_IS_QUOTA_ON(tp->t_mountp))
- return;
-
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
- ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
-
- if (udqp) {
- xfs_dqlock(udqp);
- XFS_DQHOLD(udqp);
- xfs_dqunlock(udqp);
- ASSERT(ip->i_udquot == NULL);
- ip->i_udquot = udqp;
- ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
- ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
- xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
- }
- if (gdqp) {
- xfs_dqlock(gdqp);
- XFS_DQHOLD(gdqp);
- xfs_dqunlock(gdqp);
- ASSERT(ip->i_gdquot == NULL);
- ip->i_gdquot = gdqp;
- ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
- ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
- ip->i_d.di_gid : ip->i_d.di_projid) ==
- be32_to_cpu(gdqp->q_core.d_id));
- xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
- }
-}
-
-/* ------------- list stuff -----------------*/
-STATIC void
-xfs_qm_freelist_init(xfs_frlist_t *ql)
-{
- ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
- mutex_init(&ql->qh_lock);
- ql->qh_version = 0;
- ql->qh_nelems = 0;
-}
-
-STATIC void
-xfs_qm_freelist_destroy(xfs_frlist_t *ql)
-{
- xfs_dquot_t *dqp, *nextdqp;
-
- mutex_lock(&ql->qh_lock);
- for (dqp = ql->qh_next;
- dqp != (xfs_dquot_t *)ql; ) {
- xfs_dqlock(dqp);
- nextdqp = dqp->dq_flnext;
-#ifdef QUOTADEBUG
- cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
-#endif
- XQM_FREELIST_REMOVE(dqp);
- xfs_dqunlock(dqp);
- xfs_qm_dqdestroy(dqp);
- dqp = nextdqp;
- }
- /*
- * Don't bother about unlocking.
- */
- mutex_destroy(&ql->qh_lock);
-
- ASSERT(ql->qh_nelems == 0);
-}
-
-STATIC void
-xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
-{
- dq->dq_flnext = ql->qh_next;
- dq->dq_flprev = (xfs_dquot_t *)ql;
- ql->qh_next = dq;
- dq->dq_flnext->dq_flprev = dq;
- xfs_Gqm->qm_dqfreelist.qh_nelems++;
- xfs_Gqm->qm_dqfreelist.qh_version++;
-}
-
-void
-xfs_qm_freelist_unlink(xfs_dquot_t *dq)
-{
- xfs_dquot_t *next = dq->dq_flnext;
- xfs_dquot_t *prev = dq->dq_flprev;
-
- next->dq_flprev = prev;
- prev->dq_flnext = next;
- dq->dq_flnext = dq->dq_flprev = dq;
- xfs_Gqm->qm_dqfreelist.qh_nelems--;
- xfs_Gqm->qm_dqfreelist.qh_version++;
-}
-
-void
-xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
-{
- xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
-}
-
-STATIC int
-xfs_qm_dqhashlock_nowait(
- xfs_dquot_t *dqp)
-{
- int locked;
-
- locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
- return locked;
-}
-
-int
-xfs_qm_freelist_lock_nowait(
- xfs_qm_t *xqm)
-{
- int locked;
-
- locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
- return locked;
-}
-
-STATIC int
-xfs_qm_mplist_nowait(
- xfs_mount_t *mp)
-{
- int locked;
-
- ASSERT(mp->m_quotainfo);
- locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
- return locked;
-}
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
deleted file mode 100644
index 4568deb6da8..00000000000
--- a/fs/xfs/quota/xfs_qm.h
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_QM_H__
-#define __XFS_QM_H__
-
-#include "xfs_dquot_item.h"
-#include "xfs_dquot.h"
-#include "xfs_quota_priv.h"
-#include "xfs_qm_stats.h"
-
-struct xfs_qm;
-struct xfs_inode;
-
-extern uint ndquot;
-extern mutex_t xfs_Gqm_lock;
-extern struct xfs_qm *xfs_Gqm;
-extern kmem_zone_t *qm_dqzone;
-extern kmem_zone_t *qm_dqtrxzone;
-
-/*
- * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
- * iterate over the mountpt's dquot list in one call.
- */
-#define XFS_QM_SYNC_MAX_RESTARTS 7
-
-/*
- * Ditto, for xfs_qm_dqreclaim_one.
- */
-#define XFS_QM_RECLAIM_MAX_RESTARTS 4
-
-/*
- * Ideal ratio of free to in use dquots. Quota manager makes an attempt
- * to keep this balance.
- */
-#define XFS_QM_DQFREE_RATIO 2
-
-/*
- * Dquot hashtable constants/threshold values.
- */
-#define XFS_QM_HASHSIZE_LOW (NBPP / sizeof(xfs_dqhash_t))
-#define XFS_QM_HASHSIZE_HIGH ((NBPP * 4) / sizeof(xfs_dqhash_t))
-
-/*
- * We output a cmn_err when quotachecking a quota file with more than
- * this many fsbs.
- */
-#define XFS_QM_BIG_QCHECK_NBLKS 500
-
-/*
- * This defines the unit of allocation of dquots.
- * Currently, it is just one file system block, and a 4K blk contains 30
- * (136 * 30 = 4080) dquots. It's probably not worth trying to make
- * this more dynamic.
- * XXXsup However, if this number is changed, we have to make sure that we don't
- * implicitly assume that we do allocations in chunks of a single filesystem
- * block in the dquot/xqm code.
- */
-#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
-/*
- * When doing a quotacheck, we log dquot clusters of this many FSBs at most
- * in a single transaction. We don't want to ask for too huge a log reservation.
- */
-#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3
-
-typedef xfs_dqhash_t xfs_dqlist_t;
-/*
- * The freelist head. The first two fields match the first two in the
- * xfs_dquot_t structure (in xfs_dqmarker_t)
- */
-typedef struct xfs_frlist {
- struct xfs_dquot *qh_next;
- struct xfs_dquot *qh_prev;
- mutex_t qh_lock;
- uint qh_version;
- uint qh_nelems;
-} xfs_frlist_t;
-
-/*
- * Quota Manager (global) structure. Lives only in core.
- */
-typedef struct xfs_qm {
- xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */
- xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */
- uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */
- xfs_frlist_t qm_dqfreelist; /* freelist of dquots */
- atomic_t qm_totaldquots; /* total incore dquots */
- uint qm_nrefs; /* file systems with quota on */
- int qm_dqfree_ratio;/* ratio of free to inuse dquots */
- kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */
- kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */
-} xfs_qm_t;
-
-/*
- * Various quota information for individual filesystems.
- * The mount structure keeps a pointer to this.
- */
-typedef struct xfs_quotainfo {
- xfs_inode_t *qi_uquotaip; /* user quota inode */
- xfs_inode_t *qi_gquotaip; /* group quota inode */
- lock_t qi_pinlock; /* dquot pinning mutex */
- xfs_dqlist_t qi_dqlist; /* all dquots in filesys */
- int qi_dqreclaims; /* a change here indicates
- a removal in the dqlist */
- time_t qi_btimelimit; /* limit for blks timer */
- time_t qi_itimelimit; /* limit for inodes timer */
- time_t qi_rtbtimelimit;/* limit for rt blks timer */
- xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */
- xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */
- xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */
- mutex_t qi_quotaofflock;/* to serialize quotaoff */
- xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */
- uint qi_dqperchunk; /* # ondisk dqs in above chunk */
- xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */
- xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */
- xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */
- xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */
- xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */
- xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */
-} xfs_quotainfo_t;
-
-
-extern xfs_dqtrxops_t xfs_trans_dquot_ops;
-
-extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
-extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
- xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
-extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
-extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
-
-/*
- * We keep the usr and grp dquots separately so that locking will be easier
- * to do at commit time. All transactions that we know of at this point
- * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
- */
-#define XFS_QM_TRANS_MAXDQS 2
-typedef struct xfs_dquot_acct {
- xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
- xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
-} xfs_dquot_acct_t;
-
-/*
- * Users are allowed to have a usage exceeding their softlimit for
- * a period this long.
- */
-#define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */
-#define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */
-#define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */
-
-#define XFS_QM_BWARNLIMIT 5
-#define XFS_QM_IWARNLIMIT 5
-#define XFS_QM_RTBWARNLIMIT 5
-
-#define XFS_QM_LOCK(xqm) (mutex_lock(&xqm##_lock))
-#define XFS_QM_UNLOCK(xqm) (mutex_unlock(&xqm##_lock))
-#define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++)
-#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--)
-
-extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern int xfs_qm_mount_quotas(xfs_mount_t *, int);
-extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint);
-extern int xfs_qm_quotacheck(xfs_mount_t *);
-extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *);
-extern int xfs_qm_unmount_quotas(xfs_mount_t *);
-extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-extern int xfs_qm_sync(xfs_mount_t *, short);
-
-/* dquot stuff */
-extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int xfs_qm_dqattach(xfs_inode_t *, uint);
-extern void xfs_qm_dqdetach(xfs_inode_t *);
-extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint);
-extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
-
-/* vop stuff */
-extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
- uid_t, gid_t, prid_t, uint,
- xfs_dquot_t **, xfs_dquot_t **);
-extern void xfs_qm_vop_dqattach_and_dqmod_newinode(
- xfs_trans_t *, xfs_inode_t *,
- xfs_dquot_t *, xfs_dquot_t *);
-extern int xfs_qm_vop_rename_dqattach(xfs_inode_t **);
-extern xfs_dquot_t * xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *,
- xfs_dquot_t **, xfs_dquot_t *);
-extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
- xfs_dquot_t *, xfs_dquot_t *, uint);
-
-/* list stuff */
-extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
-extern void xfs_qm_freelist_unlink(xfs_dquot_t *);
-extern int xfs_qm_freelist_lock_nowait(xfs_qm_t *);
-
-/* system call interface */
-extern int xfs_qm_quotactl(bhv_desc_t *, int, int, xfs_caddr_t);
-
-#ifdef DEBUG
-extern int xfs_qm_internalqcheck(xfs_mount_t *);
-#else
-#define xfs_qm_internalqcheck(mp) (0)
-#endif
-
-#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
deleted file mode 100644
index 90402a1c398..00000000000
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_clnt.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_rw.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-#define MNTOPT_QUOTA "quota" /* disk quotas (user) */
-#define MNTOPT_NOQUOTA "noquota" /* no quotas */
-#define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */
-#define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */
-#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */
-#define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */
-#define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */
-#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */
-#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
-#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
-#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
-#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */
-
-STATIC int
-xfs_qm_parseargs(
- struct bhv_desc *bhv,
- char *options,
- struct xfs_mount_args *args,
- int update)
-{
- size_t length;
- char *local_options = options;
- char *this_char;
- int error;
- int referenced = update;
-
- while ((this_char = strsep(&local_options, ",")) != NULL) {
- length = strlen(this_char);
- if (local_options)
- length++;
-
- if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
- args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA);
- args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA);
- referenced = update;
- } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
- !strcmp(this_char, MNTOPT_UQUOTA) ||
- !strcmp(this_char, MNTOPT_USRQUOTA)) {
- args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF;
- referenced = 1;
- } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
- !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
- args->flags |= XFSMNT_UQUOTA;
- args->flags &= ~XFSMNT_UQUOTAENF;
- referenced = 1;
- } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
- !strcmp(this_char, MNTOPT_PRJQUOTA)) {
- args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF;
- referenced = 1;
- } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
- args->flags |= XFSMNT_PQUOTA;
- args->flags &= ~XFSMNT_PQUOTAENF;
- referenced = 1;
- } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
- !strcmp(this_char, MNTOPT_GRPQUOTA)) {
- args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
- referenced = 1;
- } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
- args->flags |= XFSMNT_GQUOTA;
- args->flags &= ~XFSMNT_GQUOTAENF;
- referenced = 1;
- } else {
- if (local_options)
- *(local_options-1) = ',';
- continue;
- }
-
- while (length--)
- *this_char++ = ',';
- }
-
- if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) {
- cmn_err(CE_WARN,
- "XFS: cannot mount with both project and group quota");
- return XFS_ERROR(EINVAL);
- }
-
- PVFS_PARSEARGS(BHV_NEXT(bhv), options, args, update, error);
- if (!error && !referenced)
- bhv_remove_vfsops(bhvtovfs(bhv), VFS_POSITION_QM);
- return error;
-}
-
-STATIC int
-xfs_qm_showargs(
- struct bhv_desc *bhv,
- struct seq_file *m)
-{
- struct vfs *vfsp = bhvtovfs(bhv);
- struct xfs_mount *mp = XFS_VFSTOM(vfsp);
- int error;
-
- if (mp->m_qflags & XFS_UQUOTA_ACCT) {
- (mp->m_qflags & XFS_UQUOTA_ENFD) ?
- seq_puts(m, "," MNTOPT_USRQUOTA) :
- seq_puts(m, "," MNTOPT_UQUOTANOENF);
- }
-
- if (mp->m_qflags & XFS_PQUOTA_ACCT) {
- (mp->m_qflags & XFS_OQUOTA_ENFD) ?
- seq_puts(m, "," MNTOPT_PRJQUOTA) :
- seq_puts(m, "," MNTOPT_PQUOTANOENF);
- }
-
- if (mp->m_qflags & XFS_GQUOTA_ACCT) {
- (mp->m_qflags & XFS_OQUOTA_ENFD) ?
- seq_puts(m, "," MNTOPT_GRPQUOTA) :
- seq_puts(m, "," MNTOPT_GQUOTANOENF);
- }
-
- if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
- seq_puts(m, "," MNTOPT_NOQUOTA);
-
- PVFS_SHOWARGS(BHV_NEXT(bhv), m, error);
- return error;
-}
-
-STATIC int
-xfs_qm_mount(
- struct bhv_desc *bhv,
- struct xfs_mount_args *args,
- struct cred *cr)
-{
- struct vfs *vfsp = bhvtovfs(bhv);
- struct xfs_mount *mp = XFS_VFSTOM(vfsp);
- int error;
-
- if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA | XFSMNT_PQUOTA))
- xfs_qm_mount_quotainit(mp, args->flags);
- PVFS_MOUNT(BHV_NEXT(bhv), args, cr, error);
- return error;
-}
-
-STATIC int
-xfs_qm_syncall(
- struct bhv_desc *bhv,
- int flags,
- cred_t *credp)
-{
- struct vfs *vfsp = bhvtovfs(bhv);
- struct xfs_mount *mp = XFS_VFSTOM(vfsp);
- int error;
-
- /*
- * Get the Quota Manager to flush the dquots.
- */
- if (XFS_IS_QUOTA_ON(mp)) {
- if ((error = xfs_qm_sync(mp, flags))) {
- /*
- * If we got an IO error, we will be shutting down.
- * So, there's nothing more for us to do here.
- */
- ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp));
- if (XFS_FORCED_SHUTDOWN(mp)) {
- return XFS_ERROR(error);
- }
- }
- }
- PVFS_SYNC(BHV_NEXT(bhv), flags, credp, error);
- return error;
-}
-
-STATIC int
-xfs_qm_newmount(
- xfs_mount_t *mp,
- uint *needquotamount,
- uint *quotaflags)
-{
- uint quotaondisk;
- uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
-
- *quotaflags = 0;
- *needquotamount = B_FALSE;
-
- quotaondisk = XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
- (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
-
- if (quotaondisk) {
- uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
- pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT;
- gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
- }
-
- /*
- * If the device itself is read-only, we can't allow
- * the user to change the state of quota on the mount -
- * this would generate a transaction on the ro device,
- * which would lead to an I/O error and shutdown
- */
-
- if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
- (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) ||
- (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
- (!pquotaondisk && XFS_IS_PQUOTA_ON(mp)) ||
- (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
- (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) &&
- xfs_dev_is_read_only(mp, "changing quota state")) {
- cmn_err(CE_WARN,
- "XFS: please mount with%s%s%s%s.",
- (!quotaondisk ? "out quota" : ""),
- (uquotaondisk ? " usrquota" : ""),
- (pquotaondisk ? " prjquota" : ""),
- (gquotaondisk ? " grpquota" : ""));
- return XFS_ERROR(EPERM);
- }
-
- if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
- /*
- * Call mount_quotas at this point only if we won't have to do
- * a quotacheck.
- */
- if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
- /*
- * If an error occured, qm_mount_quotas code
- * has already disabled quotas. So, just finish
- * mounting, and get on with the boring life
- * without disk quotas.
- */
- xfs_qm_mount_quotas(mp, 0);
- } else {
- /*
- * Clear the quota flags, but remember them. This
- * is so that the quota code doesn't get invoked
- * before we're ready. This can happen when an
- * inode goes inactive and wants to free blocks,
- * or via xfs_log_mount_finish.
- */
- *needquotamount = B_TRUE;
- *quotaflags = mp->m_qflags;
- mp->m_qflags = 0;
- }
- }
-
- return 0;
-}
-
-STATIC int
-xfs_qm_endmount(
- xfs_mount_t *mp,
- uint needquotamount,
- uint quotaflags,
- int mfsi_flags)
-{
- if (needquotamount) {
- ASSERT(mp->m_qflags == 0);
- mp->m_qflags = quotaflags;
- xfs_qm_mount_quotas(mp, mfsi_flags);
- }
-
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
- if (! (XFS_IS_QUOTA_ON(mp)))
- xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
- else
- xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
-#endif
-
-#ifdef QUOTADEBUG
- if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp))
- cmn_err(CE_WARN, "XFS: mount internalqcheck failed");
-#endif
-
- return 0;
-}
-
-STATIC void
-xfs_qm_dqrele_null(
- xfs_dquot_t *dq)
-{
- /*
- * Called from XFS, where we always check first for a NULL dquot.
- */
- if (!dq)
- return;
- xfs_qm_dqrele(dq);
-}
-
-
-STATIC struct xfs_qmops xfs_qmcore_xfs = {
- .xfs_qminit = xfs_qm_newmount,
- .xfs_qmdone = xfs_qm_unmount_quotadestroy,
- .xfs_qmmount = xfs_qm_endmount,
- .xfs_qmunmount = xfs_qm_unmount_quotas,
- .xfs_dqrele = xfs_qm_dqrele_null,
- .xfs_dqattach = xfs_qm_dqattach,
- .xfs_dqdetach = xfs_qm_dqdetach,
- .xfs_dqpurgeall = xfs_qm_dqpurge_all,
- .xfs_dqvopalloc = xfs_qm_vop_dqalloc,
- .xfs_dqvopcreate = xfs_qm_vop_dqattach_and_dqmod_newinode,
- .xfs_dqvoprename = xfs_qm_vop_rename_dqattach,
- .xfs_dqvopchown = xfs_qm_vop_chown,
- .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve,
- .xfs_dqtrxops = &xfs_trans_dquot_ops,
-};
-
-struct bhv_vfsops xfs_qmops = { {
- BHV_IDENTITY_INIT(VFS_BHV_QM, VFS_POSITION_QM),
- .vfs_parseargs = xfs_qm_parseargs,
- .vfs_showargs = xfs_qm_showargs,
- .vfs_mount = xfs_qm_mount,
- .vfs_sync = xfs_qm_syncall,
- .vfs_quotactl = xfs_qm_quotactl, },
-};
-
-
-void __init
-xfs_qm_init(void)
-{
- static char message[] __initdata =
- KERN_INFO "SGI XFS Quota Management subsystem\n";
-
- printk(message);
- mutex_init(&xfs_Gqm_lock);
- vfs_bhv_set_custom(&xfs_qmops, &xfs_qmcore_xfs);
- xfs_qm_init_procfs();
-}
-
-void __exit
-xfs_qm_exit(void)
-{
- vfs_bhv_clr_custom(&xfs_qmops);
- xfs_qm_cleanup_procfs();
- if (qm_dqzone)
- kmem_cache_destroy(qm_dqzone);
- if (qm_dqtrxzone)
- kmem_cache_destroy(qm_dqtrxzone);
-}
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
deleted file mode 100644
index 0570f773355..00000000000
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_rw.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-struct xqmstats xqmstats;
-
-STATIC int
-xfs_qm_read_xfsquota(
- char *buffer,
- char **start,
- off_t offset,
- int count,
- int *eof,
- void *data)
-{
- int len;
-
- /* maximum; incore; ratio free to inuse; freelist */
- len = sprintf(buffer, "%d\t%d\t%d\t%u\n",
- ndquot,
- xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
- xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
- xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0);
-
- if (offset >= len) {
- *start = buffer;
- *eof = 1;
- return 0;
- }
- *start = buffer + offset;
- if ((len -= offset) > count)
- return count;
- *eof = 1;
-
- return len;
-}
-
-STATIC int
-xfs_qm_read_stats(
- char *buffer,
- char **start,
- off_t offset,
- int count,
- int *eof,
- void *data)
-{
- int len;
-
- /* quota performance statistics */
- len = sprintf(buffer, "qm %u %u %u %u %u %u %u %u\n",
- xqmstats.xs_qm_dqreclaims,
- xqmstats.xs_qm_dqreclaim_misses,
- xqmstats.xs_qm_dquot_dups,
- xqmstats.xs_qm_dqcachemisses,
- xqmstats.xs_qm_dqcachehits,
- xqmstats.xs_qm_dqwants,
- xqmstats.xs_qm_dqshake_reclaims,
- xqmstats.xs_qm_dqinact_reclaims);
-
- if (offset >= len) {
- *start = buffer;
- *eof = 1;
- return 0;
- }
- *start = buffer + offset;
- if ((len -= offset) > count)
- return count;
- *eof = 1;
-
- return len;
-}
-
-void
-xfs_qm_init_procfs(void)
-{
- create_proc_read_entry("fs/xfs/xqmstat", 0, NULL, xfs_qm_read_stats, NULL);
- create_proc_read_entry("fs/xfs/xqm", 0, NULL, xfs_qm_read_xfsquota, NULL);
-}
-
-void
-xfs_qm_cleanup_procfs(void)
-{
- remove_proc_entry("fs/xfs/xqm", NULL);
- remove_proc_entry("fs/xfs/xqmstat", NULL);
-}
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h
deleted file mode 100644
index a50ffabcf55..00000000000
--- a/fs/xfs/quota/xfs_qm_stats.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2002 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_QM_STATS_H__
-#define __XFS_QM_STATS_H__
-
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
-/*
- * XQM global statistics
- */
-struct xqmstats {
- __uint32_t xs_qm_dqreclaims;
- __uint32_t xs_qm_dqreclaim_misses;
- __uint32_t xs_qm_dquot_dups;
- __uint32_t xs_qm_dqcachemisses;
- __uint32_t xs_qm_dqcachehits;
- __uint32_t xs_qm_dqwants;
- __uint32_t xs_qm_dqshake_reclaims;
- __uint32_t xs_qm_dqinact_reclaims;
-};
-
-extern struct xqmstats xqmstats;
-
-# define XQM_STATS_INC(count) ( (count)++ )
-
-extern void xfs_qm_init_procfs(void);
-extern void xfs_qm_cleanup_procfs(void);
-
-#else
-
-# define XQM_STATS_INC(count) do { } while (0)
-
-static __inline void xfs_qm_init_procfs(void) { };
-static __inline void xfs_qm_cleanup_procfs(void) { };
-
-#endif
-
-#endif /* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
deleted file mode 100644
index 676884394aa..00000000000
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ /dev/null
@@ -1,1483 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <linux/capability.h>
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_rw.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_qm.h"
-
-#ifdef DEBUG
-# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args)
-#else
-# define qdprintk(s, args...) do { } while (0)
-#endif
-
-STATIC int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
-STATIC int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
- fs_disk_quota_t *);
-STATIC int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
-STATIC int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
- fs_disk_quota_t *);
-STATIC int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
-STATIC int xfs_qm_scall_quotaoff(xfs_mount_t *, uint, boolean_t);
-STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
-STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
- uint);
-STATIC uint xfs_qm_import_flags(uint);
-STATIC uint xfs_qm_export_flags(uint);
-STATIC uint xfs_qm_import_qtype_flags(uint);
-STATIC uint xfs_qm_export_qtype_flags(uint);
-STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
- fs_disk_quota_t *);
-
-
-/*
- * The main distribution switch of all XFS quotactl system calls.
- */
-int
-xfs_qm_quotactl(
- struct bhv_desc *bdp,
- int cmd,
- int id,
- xfs_caddr_t addr)
-{
- xfs_mount_t *mp;
- int error;
- struct vfs *vfsp;
-
- vfsp = bhvtovfs(bdp);
- mp = XFS_VFSTOM(vfsp);
-
- ASSERT(addr != NULL || cmd == Q_XQUOTASYNC);
-
- /*
- * The following commands are valid even when quotaoff.
- */
- switch (cmd) {
- case Q_XQUOTARM:
- /*
- * Truncate quota files. quota must be off.
- */
- if (XFS_IS_QUOTA_ON(mp))
- return XFS_ERROR(EINVAL);
- if (vfsp->vfs_flag & VFS_RDONLY)
- return XFS_ERROR(EROFS);
- return (xfs_qm_scall_trunc_qfiles(mp,
- xfs_qm_import_qtype_flags(*(uint *)addr)));
-
- case Q_XGETQSTAT:
- /*
- * Get quota status information.
- */
- return (xfs_qm_scall_getqstat(mp, (fs_quota_stat_t *)addr));
-
- case Q_XQUOTAON:
- /*
- * QUOTAON - enabling quota enforcement.
- * Quota accounting must be turned on at mount time.
- */
- if (vfsp->vfs_flag & VFS_RDONLY)
- return XFS_ERROR(EROFS);
- return (xfs_qm_scall_quotaon(mp,
- xfs_qm_import_flags(*(uint *)addr)));
-
- case Q_XQUOTAOFF:
- if (vfsp->vfs_flag & VFS_RDONLY)
- return XFS_ERROR(EROFS);
- break;
-
- case Q_XQUOTASYNC:
- return (xfs_sync_inodes(mp, SYNC_DELWRI, 0, NULL));
-
- default:
- break;
- }
-
- if (! XFS_IS_QUOTA_ON(mp))
- return XFS_ERROR(ESRCH);
-
- switch (cmd) {
- case Q_XQUOTAOFF:
- if (vfsp->vfs_flag & VFS_RDONLY)
- return XFS_ERROR(EROFS);
- error = xfs_qm_scall_quotaoff(mp,
- xfs_qm_import_flags(*(uint *)addr),
- B_FALSE);
- break;
-
- case Q_XGETQUOTA:
- error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_USER,
- (fs_disk_quota_t *)addr);
- break;
- case Q_XGETGQUOTA:
- error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
- (fs_disk_quota_t *)addr);
- break;
- case Q_XGETPQUOTA:
- error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_PROJ,
- (fs_disk_quota_t *)addr);
- break;
-
- case Q_XSETQLIM:
- if (vfsp->vfs_flag & VFS_RDONLY)
- return XFS_ERROR(EROFS);
- error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_USER,
- (fs_disk_quota_t *)addr);
- break;
- case Q_XSETGQLIM:
- if (vfsp->vfs_flag & VFS_RDONLY)
- return XFS_ERROR(EROFS);
- error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
- (fs_disk_quota_t *)addr);
- break;
- case Q_XSETPQLIM:
- if (vfsp->vfs_flag & VFS_RDONLY)
- return XFS_ERROR(EROFS);
- error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_PROJ,
- (fs_disk_quota_t *)addr);
- break;
-
- default:
- error = XFS_ERROR(EINVAL);
- break;
- }
-
- return (error);
-}
-
-/*
- * Turn off quota accounting and/or enforcement for all udquots and/or
- * gdquots. Called only at unmount time.
- *
- * This assumes that there are no dquots of this file system cached
- * incore, and modifies the ondisk dquot directly. Therefore, for example,
- * it is an error to call this twice, without purging the cache.
- */
-STATIC int
-xfs_qm_scall_quotaoff(
- xfs_mount_t *mp,
- uint flags,
- boolean_t force)
-{
- uint dqtype;
- unsigned long s;
- int error;
- uint inactivate_flags;
- xfs_qoff_logitem_t *qoffstart;
- int nculprits;
-
- if (!force && !capable(CAP_SYS_ADMIN))
- return XFS_ERROR(EPERM);
- /*
- * No file system can have quotas enabled on disk but not in core.
- * Note that quota utilities (like quotaoff) _expect_
- * errno == EEXIST here.
- */
- if ((mp->m_qflags & flags) == 0)
- return XFS_ERROR(EEXIST);
- error = 0;
-
- flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-
- /*
- * We don't want to deal with two quotaoffs messing up each other,
- * so we're going to serialize it. quotaoff isn't exactly a performance
- * critical thing.
- * If quotaoff, then we must be dealing with the root filesystem.
- */
- ASSERT(mp->m_quotainfo);
- if (mp->m_quotainfo)
- mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
-
- ASSERT(mp->m_quotainfo);
-
- /*
- * If we're just turning off quota enforcement, change mp and go.
- */
- if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
- mp->m_qflags &= ~(flags);
-
- s = XFS_SB_LOCK(mp);
- mp->m_sb.sb_qflags = mp->m_qflags;
- XFS_SB_UNLOCK(mp, s);
- mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-
- /* XXX what to do if error ? Revert back to old vals incore ? */
- error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
- return (error);
- }
-
- dqtype = 0;
- inactivate_flags = 0;
- /*
- * If accounting is off, we must turn enforcement off, clear the
- * quota 'CHKD' certificate to make it known that we have to
- * do a quotacheck the next time this quota is turned on.
- */
- if (flags & XFS_UQUOTA_ACCT) {
- dqtype |= XFS_QMOPT_UQUOTA;
- flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
- inactivate_flags |= XFS_UQUOTA_ACTIVE;
- }
- if (flags & XFS_GQUOTA_ACCT) {
- dqtype |= XFS_QMOPT_GQUOTA;
- flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
- inactivate_flags |= XFS_GQUOTA_ACTIVE;
- } else if (flags & XFS_PQUOTA_ACCT) {
- dqtype |= XFS_QMOPT_PQUOTA;
- flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
- inactivate_flags |= XFS_PQUOTA_ACTIVE;
- }
-
- /*
- * Nothing to do? Don't complain. This happens when we're just
- * turning off quota enforcement.
- */
- if ((mp->m_qflags & flags) == 0) {
- mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
- return (0);
- }
-
- /*
- * Write the LI_QUOTAOFF log record, and do SB changes atomically,
- * and synchronously.
- */
- xfs_qm_log_quotaoff(mp, &qoffstart, flags);
-
- /*
- * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
- * to take care of the race between dqget and quotaoff. We don't take
- * any special locks to reset these bits. All processes need to check
- * these bits *after* taking inode lock(s) to see if the particular
- * quota type is in the process of being turned off. If *ACTIVE, it is
- * guaranteed that all dquot structures and all quotainode ptrs will all
- * stay valid as long as that inode is kept locked.
- *
- * There is no turning back after this.
- */
- mp->m_qflags &= ~inactivate_flags;
-
- /*
- * Give back all the dquot reference(s) held by inodes.
- * Here we go thru every single incore inode in this file system, and
- * do a dqrele on the i_udquot/i_gdquot that it may have.
- * Essentially, as long as somebody has an inode locked, this guarantees
- * that quotas will not be turned off. This is handy because in a
- * transaction once we lock the inode(s) and check for quotaon, we can
- * depend on the quota inodes (and other things) being valid as long as
- * we keep the lock(s).
- */
- xfs_qm_dqrele_all_inodes(mp, flags);
-
- /*
- * Next we make the changes in the quota flag in the mount struct.
- * This isn't protected by a particular lock directly, because we
- * don't want to take a mrlock everytime we depend on quotas being on.
- */
- mp->m_qflags &= ~(flags);
-
- /*
- * Go through all the dquots of this file system and purge them,
- * according to what was turned off. We may not be able to get rid
- * of all dquots, because dquots can have temporary references that
- * are not attached to inodes. eg. xfs_setattr, xfs_create.
- * So, if we couldn't purge all the dquots from the filesystem,
- * we can't get rid of the incore data structures.
- */
- while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype|XFS_QMOPT_QUOTAOFF)))
- delay(10 * nculprits);
-
- /*
- * Transactions that had started before ACTIVE state bit was cleared
- * could have logged many dquots, so they'd have higher LSNs than
- * the first QUOTAOFF log record does. If we happen to crash when
- * the tail of the log has gone past the QUOTAOFF record, but
- * before the last dquot modification, those dquots __will__
- * recover, and that's not good.
- *
- * So, we have QUOTAOFF start and end logitems; the start
- * logitem won't get overwritten until the end logitem appears...
- */
- xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
-
- /*
- * If quotas is completely disabled, close shop.
- */
- if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
- ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
- mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
- xfs_qm_destroy_quotainfo(mp);
- return (0);
- }
-
- /*
- * Release our quotainode references, and vn_purge them,
- * if we don't need them anymore.
- */
- if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) {
- XFS_PURGE_INODE(XFS_QI_UQIP(mp));
- XFS_QI_UQIP(mp) = NULL;
- }
- if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) {
- XFS_PURGE_INODE(XFS_QI_GQIP(mp));
- XFS_QI_GQIP(mp) = NULL;
- }
- mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-
- return (error);
-}
-
-STATIC int
-xfs_qm_scall_trunc_qfiles(
- xfs_mount_t *mp,
- uint flags)
-{
- int error;
- xfs_inode_t *qip;
-
- if (!capable(CAP_SYS_ADMIN))
- return XFS_ERROR(EPERM);
- error = 0;
- if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) {
- qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
- return XFS_ERROR(EINVAL);
- }
-
- if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
- error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0);
- if (! error) {
- (void) xfs_truncate_file(mp, qip);
- VN_RELE(XFS_ITOV(qip));
- }
- }
-
- if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
- mp->m_sb.sb_gquotino != NULLFSINO) {
- error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0);
- if (! error) {
- (void) xfs_truncate_file(mp, qip);
- VN_RELE(XFS_ITOV(qip));
- }
- }
-
- return (error);
-}
-
-
-/*
- * Switch on (a given) quota enforcement for a filesystem. This takes
- * effect immediately.
- * (Switching on quota accounting must be done at mount time.)
- */
-STATIC int
-xfs_qm_scall_quotaon(
- xfs_mount_t *mp,
- uint flags)
-{
- int error;
- unsigned long s;
- uint qf;
- uint accflags;
- __int64_t sbflags;
-
- if (!capable(CAP_SYS_ADMIN))
- return XFS_ERROR(EPERM);
-
- flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
- /*
- * Switching on quota accounting must be done at mount time.
- */
- accflags = flags & XFS_ALL_QUOTA_ACCT;
- flags &= ~(XFS_ALL_QUOTA_ACCT);
-
- sbflags = 0;
-
- if (flags == 0) {
- qdprintk("quotaon: zero flags, m_qflags=%x\n", mp->m_qflags);
- return XFS_ERROR(EINVAL);
- }
-
- /* No fs can turn on quotas with a delayed effect */
- ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
-
- /*
- * Can't enforce without accounting. We check the superblock
- * qflags here instead of m_qflags because rootfs can have
- * quota acct on ondisk without m_qflags' knowing.
- */
- if (((flags & XFS_UQUOTA_ACCT) == 0 &&
- (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
- (flags & XFS_UQUOTA_ENFD))
- ||
- ((flags & XFS_PQUOTA_ACCT) == 0 &&
- (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
- (flags & XFS_OQUOTA_ENFD))
- ||
- ((flags & XFS_GQUOTA_ACCT) == 0 &&
- (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
- (flags & XFS_OQUOTA_ENFD))) {
- qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n",
- flags, mp->m_sb.sb_qflags);
- return XFS_ERROR(EINVAL);
- }
- /*
- * If everything's upto-date incore, then don't waste time.
- */
- if ((mp->m_qflags & flags) == flags)
- return XFS_ERROR(EEXIST);
-
- /*
- * Change sb_qflags on disk but not incore mp->qflags
- * if this is the root filesystem.
- */
- s = XFS_SB_LOCK(mp);
- qf = mp->m_sb.sb_qflags;
- mp->m_sb.sb_qflags = qf | flags;
- XFS_SB_UNLOCK(mp, s);
-
- /*
- * There's nothing to change if it's the same.
- */
- if ((qf & flags) == flags && sbflags == 0)
- return XFS_ERROR(EEXIST);
- sbflags |= XFS_SB_QFLAGS;
-
- if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
- return (error);
- /*
- * If we aren't trying to switch on quota enforcement, we are done.
- */
- if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
- (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
- ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
- (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
- ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
- (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
- (flags & XFS_ALL_QUOTA_ENFD) == 0)
- return (0);
-
- if (! XFS_IS_QUOTA_RUNNING(mp))
- return XFS_ERROR(ESRCH);
-
- /*
- * Switch on quota enforcement in core.
- */
- mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
- mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
- mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-
- return (0);
-}
-
-
-/*
- * Return quota status information, such as uquota-off, enforcements, etc.
- */
-STATIC int
-xfs_qm_scall_getqstat(
- xfs_mount_t *mp,
- fs_quota_stat_t *out)
-{
- xfs_inode_t *uip, *gip;
- boolean_t tempuqip, tempgqip;
-
- uip = gip = NULL;
- tempuqip = tempgqip = B_FALSE;
- memset(out, 0, sizeof(fs_quota_stat_t));
-
- out->qs_version = FS_QSTAT_VERSION;
- if (! XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
- out->qs_uquota.qfs_ino = NULLFSINO;
- out->qs_gquota.qfs_ino = NULLFSINO;
- return (0);
- }
- out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
- (XFS_ALL_QUOTA_ACCT|
- XFS_ALL_QUOTA_ENFD));
- out->qs_pad = 0;
- out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
- out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
-
- if (mp->m_quotainfo) {
- uip = mp->m_quotainfo->qi_uquotaip;
- gip = mp->m_quotainfo->qi_gquotaip;
- }
- if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
- if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
- 0, 0, &uip, 0) == 0)
- tempuqip = B_TRUE;
- }
- if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
- if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
- 0, 0, &gip, 0) == 0)
- tempgqip = B_TRUE;
- }
- if (uip) {
- out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
- out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
- if (tempuqip)
- VN_RELE(XFS_ITOV(uip));
- }
- if (gip) {
- out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
- out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
- if (tempgqip)
- VN_RELE(XFS_ITOV(gip));
- }
- if (mp->m_quotainfo) {
- out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp);
- out->qs_btimelimit = XFS_QI_BTIMELIMIT(mp);
- out->qs_itimelimit = XFS_QI_ITIMELIMIT(mp);
- out->qs_rtbtimelimit = XFS_QI_RTBTIMELIMIT(mp);
- out->qs_bwarnlimit = XFS_QI_BWARNLIMIT(mp);
- out->qs_iwarnlimit = XFS_QI_IWARNLIMIT(mp);
- }
- return (0);
-}
-
-/*
- * Adjust quota limits, and start/stop timers accordingly.
- */
-STATIC int
-xfs_qm_scall_setqlim(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- uint type,
- fs_disk_quota_t *newlim)
-{
- xfs_disk_dquot_t *ddq;
- xfs_dquot_t *dqp;
- xfs_trans_t *tp;
- int error;
- xfs_qcnt_t hard, soft;
-
- if (!capable(CAP_SYS_ADMIN))
- return XFS_ERROR(EPERM);
-
- if ((newlim->d_fieldmask &
- (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0)
- return (0);
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
- if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
- 0, 0, XFS_DEFAULT_LOG_COUNT))) {
- xfs_trans_cancel(tp, 0);
- return (error);
- }
-
- /*
- * We don't want to race with a quotaoff so take the quotaoff lock.
- * (We don't hold an inode lock, so there's nothing else to stop
- * a quotaoff from happening). (XXXThis doesn't currently happen
- * because we take the vfslock before calling xfs_qm_sysent).
- */
- mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
-
- /*
- * Get the dquot (locked), and join it to the transaction.
- * Allocate the dquot if this doesn't exist.
- */
- if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
- xfs_trans_cancel(tp, XFS_TRANS_ABORT);
- mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
- ASSERT(error != ENOENT);
- return (error);
- }
- xfs_dqtrace_entry(dqp, "Q_SETQLIM: AFT DQGET");
- xfs_trans_dqjoin(tp, dqp);
- ddq = &dqp->q_core;
-
- /*
- * Make sure that hardlimits are >= soft limits before changing.
- */
- hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
- (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
- be64_to_cpu(ddq->d_blk_hardlimit);
- soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
- (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
- be64_to_cpu(ddq->d_blk_softlimit);
- if (hard == 0 || hard >= soft) {
- ddq->d_blk_hardlimit = cpu_to_be64(hard);
- ddq->d_blk_softlimit = cpu_to_be64(soft);
- if (id == 0) {
- mp->m_quotainfo->qi_bhardlimit = hard;
- mp->m_quotainfo->qi_bsoftlimit = soft;
- }
- } else {
- qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft);
- }
- hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
- (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
- be64_to_cpu(ddq->d_rtb_hardlimit);
- soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
- (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
- be64_to_cpu(ddq->d_rtb_softlimit);
- if (hard == 0 || hard >= soft) {
- ddq->d_rtb_hardlimit = cpu_to_be64(hard);
- ddq->d_rtb_softlimit = cpu_to_be64(soft);
- if (id == 0) {
- mp->m_quotainfo->qi_rtbhardlimit = hard;
- mp->m_quotainfo->qi_rtbsoftlimit = soft;
- }
- } else {
- qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
- }
-
- hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
- (xfs_qcnt_t) newlim->d_ino_hardlimit :
- be64_to_cpu(ddq->d_ino_hardlimit);
- soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
- (xfs_qcnt_t) newlim->d_ino_softlimit :
- be64_to_cpu(ddq->d_ino_softlimit);
- if (hard == 0 || hard >= soft) {
- ddq->d_ino_hardlimit = cpu_to_be64(hard);
- ddq->d_ino_softlimit = cpu_to_be64(soft);
- if (id == 0) {
- mp->m_quotainfo->qi_ihardlimit = hard;
- mp->m_quotainfo->qi_isoftlimit = soft;
- }
- } else {
- qdprintk("ihard %Ld < isoft %Ld\n", hard, soft);
- }
-
- /*
- * Update warnings counter(s) if requested
- */
- if (newlim->d_fieldmask & FS_DQ_BWARNS)
- ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns);
- if (newlim->d_fieldmask & FS_DQ_IWARNS)
- ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns);
- if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
- ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns);
-
- if (id == 0) {
- /*
- * Timelimits for the super user set the relative time
- * the other users can be over quota for this file system.
- * If it is zero a default is used. Ditto for the default
- * soft and hard limit values (already done, above), and
- * for warnings.
- */
- if (newlim->d_fieldmask & FS_DQ_BTIMER) {
- mp->m_quotainfo->qi_btimelimit = newlim->d_btimer;
- ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
- }
- if (newlim->d_fieldmask & FS_DQ_ITIMER) {
- mp->m_quotainfo->qi_itimelimit = newlim->d_itimer;
- ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
- }
- if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
- mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer;
- ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
- }
- if (newlim->d_fieldmask & FS_DQ_BWARNS)
- mp->m_quotainfo->qi_bwarnlimit = newlim->d_bwarns;
- if (newlim->d_fieldmask & FS_DQ_IWARNS)
- mp->m_quotainfo->qi_iwarnlimit = newlim->d_iwarns;
- if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
- mp->m_quotainfo->qi_rtbwarnlimit = newlim->d_rtbwarns;
- } else {
- /*
- * If the user is now over quota, start the timelimit.
- * The user will not be 'warned'.
- * Note that we keep the timers ticking, whether enforcement
- * is on or off. We don't really want to bother with iterating
- * over all ondisk dquots and turning the timers on/off.
- */
- xfs_qm_adjust_dqtimers(mp, ddq);
- }
- dqp->dq_flags |= XFS_DQ_DIRTY;
- xfs_trans_log_dquot(tp, dqp);
-
- xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT");
- xfs_trans_commit(tp, 0, NULL);
- xfs_qm_dqprint(dqp);
- xfs_qm_dqrele(dqp);
- mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-
- return (0);
-}
-
-STATIC int
-xfs_qm_scall_getquota(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- uint type,
- fs_disk_quota_t *out)
-{
- xfs_dquot_t *dqp;
- int error;
-
- /*
- * Try to get the dquot. We don't want it allocated on disk, so
- * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
- * exist, we'll get ENOENT back.
- */
- if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
- return (error);
- }
-
- xfs_dqtrace_entry(dqp, "Q_GETQUOTA SUCCESS");
- /*
- * If everything's NULL, this dquot doesn't quite exist as far as
- * our utility programs are concerned.
- */
- if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
- xfs_qm_dqput(dqp);
- return XFS_ERROR(ENOENT);
- }
- /* xfs_qm_dqprint(dqp); */
- /*
- * Convert the disk dquot to the exportable format
- */
- xfs_qm_export_dquot(mp, &dqp->q_core, out);
- xfs_qm_dqput(dqp);
- return (error ? XFS_ERROR(EFAULT) : 0);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff_end(
- xfs_mount_t *mp,
- xfs_qoff_logitem_t *startqoff,
- uint flags)
-{
- xfs_trans_t *tp;
- int error;
- xfs_qoff_logitem_t *qoffi;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
-
- if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
- 0, 0, XFS_DEFAULT_LOG_COUNT))) {
- xfs_trans_cancel(tp, 0);
- return (error);
- }
-
- qoffi = xfs_trans_get_qoff_item(tp, startqoff,
- flags & XFS_ALL_QUOTA_ACCT);
- xfs_trans_log_quotaoff_item(tp, qoffi);
-
- /*
- * We have to make sure that the transaction is secure on disk before we
- * return and actually stop quota accounting. So, make it synchronous.
- * We don't care about quotoff's performance.
- */
- xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0, NULL);
- return (error);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff(
- xfs_mount_t *mp,
- xfs_qoff_logitem_t **qoffstartp,
- uint flags)
-{
- xfs_trans_t *tp;
- int error;
- unsigned long s;
- xfs_qoff_logitem_t *qoffi=NULL;
- uint oldsbqflag=0;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
- if ((error = xfs_trans_reserve(tp, 0,
- sizeof(xfs_qoff_logitem_t) * 2 +
- mp->m_sb.sb_sectsize + 128,
- 0,
- 0,
- XFS_DEFAULT_LOG_COUNT))) {
- goto error0;
- }
-
- qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
- xfs_trans_log_quotaoff_item(tp, qoffi);
-
- s = XFS_SB_LOCK(mp);
- oldsbqflag = mp->m_sb.sb_qflags;
- mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
- XFS_SB_UNLOCK(mp, s);
-
- xfs_mod_sb(tp, XFS_SB_QFLAGS);
-
- /*
- * We have to make sure that the transaction is secure on disk before we
- * return and actually stop quota accounting. So, make it synchronous.
- * We don't care about quotoff's performance.
- */
- xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0, NULL);
-
-error0:
- if (error) {
- xfs_trans_cancel(tp, 0);
- /*
- * No one else is modifying sb_qflags, so this is OK.
- * We still hold the quotaofflock.
- */
- s = XFS_SB_LOCK(mp);
- mp->m_sb.sb_qflags = oldsbqflag;
- XFS_SB_UNLOCK(mp, s);
- }
- *qoffstartp = qoffi;
- return (error);
-}
-
-
-/*
- * Translate an internal style on-disk-dquot to the exportable format.
- * The main differences are that the counters/limits are all in Basic
- * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
- * to be converted to the native endianness.
- */
-STATIC void
-xfs_qm_export_dquot(
- xfs_mount_t *mp,
- xfs_disk_dquot_t *src,
- struct fs_disk_quota *dst)
-{
- memset(dst, 0, sizeof(*dst));
- dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */
- dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags);
- dst->d_id = be32_to_cpu(src->d_id);
- dst->d_blk_hardlimit =
- XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit));
- dst->d_blk_softlimit =
- XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit));
- dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit);
- dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit);
- dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount));
- dst->d_icount = be64_to_cpu(src->d_icount);
- dst->d_btimer = be32_to_cpu(src->d_btimer);
- dst->d_itimer = be32_to_cpu(src->d_itimer);
- dst->d_iwarns = be16_to_cpu(src->d_iwarns);
- dst->d_bwarns = be16_to_cpu(src->d_bwarns);
- dst->d_rtb_hardlimit =
- XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit));
- dst->d_rtb_softlimit =
- XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit));
- dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount));
- dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer);
- dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns);
-
- /*
- * Internally, we don't reset all the timers when quota enforcement
- * gets turned off. No need to confuse the userlevel code,
- * so return zeroes in that case.
- */
- if (! XFS_IS_QUOTA_ENFORCED(mp)) {
- dst->d_btimer = 0;
- dst->d_itimer = 0;
- dst->d_rtbtimer = 0;
- }
-
-#ifdef DEBUG
- if (XFS_IS_QUOTA_ENFORCED(mp) && dst->d_id != 0) {
- if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
- (dst->d_blk_softlimit > 0)) {
- ASSERT(dst->d_btimer != 0);
- }
- if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
- (dst->d_ino_softlimit > 0)) {
- ASSERT(dst->d_itimer != 0);
- }
- }
-#endif
-}
-
-STATIC uint
-xfs_qm_import_qtype_flags(
- uint uflags)
-{
- uint oflags = 0;
-
- /*
- * Can't be more than one, or none.
- */
- if (((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ==
- (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ||
- ((uflags & (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) ==
- (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) ||
- ((uflags & (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) ==
- (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) ||
- ((uflags & (XFS_GROUP_QUOTA|XFS_USER_QUOTA|XFS_PROJ_QUOTA)) == 0))
- return (0);
-
- oflags |= (uflags & XFS_USER_QUOTA) ? XFS_DQ_USER : 0;
- oflags |= (uflags & XFS_PROJ_QUOTA) ? XFS_DQ_PROJ : 0;
- oflags |= (uflags & XFS_GROUP_QUOTA) ? XFS_DQ_GROUP: 0;
- return oflags;
-}
-
-STATIC uint
-xfs_qm_export_qtype_flags(
- uint flags)
-{
- /*
- * Can't be more than one, or none.
- */
- ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) !=
- (XFS_PROJ_QUOTA | XFS_USER_QUOTA));
- ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) !=
- (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA));
- ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) !=
- (XFS_USER_QUOTA | XFS_GROUP_QUOTA));
- ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0);
-
- return (flags & XFS_DQ_USER) ?
- XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
- XFS_PROJ_QUOTA : XFS_GROUP_QUOTA;
-}
-
-STATIC uint
-xfs_qm_import_flags(
- uint uflags)
-{
- uint flags = 0;
-
- if (uflags & XFS_QUOTA_UDQ_ACCT)
- flags |= XFS_UQUOTA_ACCT;
- if (uflags & XFS_QUOTA_PDQ_ACCT)
- flags |= XFS_PQUOTA_ACCT;
- if (uflags & XFS_QUOTA_GDQ_ACCT)
- flags |= XFS_GQUOTA_ACCT;
- if (uflags & XFS_QUOTA_UDQ_ENFD)
- flags |= XFS_UQUOTA_ENFD;
- if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD))
- flags |= XFS_OQUOTA_ENFD;
- return (flags);
-}
-
-
-STATIC uint
-xfs_qm_export_flags(
- uint flags)
-{
- uint uflags;
-
- uflags = 0;
- if (flags & XFS_UQUOTA_ACCT)
- uflags |= XFS_QUOTA_UDQ_ACCT;
- if (flags & XFS_PQUOTA_ACCT)
- uflags |= XFS_QUOTA_PDQ_ACCT;
- if (flags & XFS_GQUOTA_ACCT)
- uflags |= XFS_QUOTA_GDQ_ACCT;
- if (flags & XFS_UQUOTA_ENFD)
- uflags |= XFS_QUOTA_UDQ_ENFD;
- if (flags & (XFS_OQUOTA_ENFD)) {
- uflags |= (flags & XFS_GQUOTA_ACCT) ?
- XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD;
- }
- return (uflags);
-}
-
-
-/*
- * Go thru all the inodes in the file system, releasing their dquots.
- * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff. This also gets called from
- * xfs_rootumount.
- */
-void
-xfs_qm_dqrele_all_inodes(
- struct xfs_mount *mp,
- uint flags)
-{
- xfs_inode_t *ip, *topino;
- uint ireclaims;
- vnode_t *vp;
- boolean_t vnode_refd;
-
- ASSERT(mp->m_quotainfo);
-
- XFS_MOUNT_ILOCK(mp);
-again:
- ip = mp->m_inodes;
- if (ip == NULL) {
- XFS_MOUNT_IUNLOCK(mp);
- return;
- }
- do {
- /* Skip markers inserted by xfs_sync */
- if (ip->i_mount == NULL) {
- ip = ip->i_mnext;
- continue;
- }
- /* Root inode, rbmip and rsumip have associated blocks */
- if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
- ASSERT(ip->i_udquot == NULL);
- ASSERT(ip->i_gdquot == NULL);
- ip = ip->i_mnext;
- continue;
- }
- vp = XFS_ITOV_NULL(ip);
- if (!vp) {
- ASSERT(ip->i_udquot == NULL);
- ASSERT(ip->i_gdquot == NULL);
- ip = ip->i_mnext;
- continue;
- }
- vnode_refd = B_FALSE;
- if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
- ireclaims = mp->m_ireclaims;
- topino = mp->m_inodes;
- vp = vn_grab(vp);
- if (!vp)
- goto again;
-
- XFS_MOUNT_IUNLOCK(mp);
- /* XXX restart limit ? */
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- vnode_refd = B_TRUE;
- } else {
- ireclaims = mp->m_ireclaims;
- topino = mp->m_inodes;
- XFS_MOUNT_IUNLOCK(mp);
- }
-
- /*
- * We don't keep the mountlock across the dqrele() call,
- * since it can take a while..
- */
- if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
- xfs_qm_dqrele(ip->i_udquot);
- ip->i_udquot = NULL;
- }
- if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
- xfs_qm_dqrele(ip->i_gdquot);
- ip->i_gdquot = NULL;
- }
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- /*
- * Wait until we've dropped the ilock and mountlock to
- * do the vn_rele. Or be condemned to an eternity in the
- * inactive code in hell.
- */
- if (vnode_refd)
- VN_RELE(vp);
- XFS_MOUNT_ILOCK(mp);
- /*
- * If an inode was inserted or removed, we gotta
- * start over again.
- */
- if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) {
- /* XXX use a sentinel */
- goto again;
- }
- ip = ip->i_mnext;
- } while (ip != mp->m_inodes);
-
- XFS_MOUNT_IUNLOCK(mp);
-}
-
-/*------------------------------------------------------------------------*/
-#ifdef DEBUG
-/*
- * This contains all the test functions for XFS disk quotas.
- * Currently it does a quota accounting check. ie. it walks through
- * all inodes in the file system, calculating the dquot accounting fields,
- * and prints out any inconsistencies.
- */
-xfs_dqhash_t *qmtest_udqtab;
-xfs_dqhash_t *qmtest_gdqtab;
-int qmtest_hashmask;
-int qmtest_nfails;
-mutex_t qcheck_lock;
-
-#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
- (__psunsigned_t)(id)) & \
- (qmtest_hashmask - 1))
-
-#define DQTEST_HASH(mp, id, type) ((type & XFS_DQ_USER) ? \
- (qmtest_udqtab + \
- DQTEST_HASHVAL(mp, id)) : \
- (qmtest_gdqtab + \
- DQTEST_HASHVAL(mp, id)))
-
-#define DQTEST_LIST_PRINT(l, NXT, title) \
-{ \
- xfs_dqtest_t *dqp; int i = 0;\
- cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
- for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \
- dqp = (xfs_dqtest_t *)dqp->NXT) { \
- cmn_err(CE_DEBUG, " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \
- ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \
- dqp->d_bcount, dqp->d_icount); } \
-}
-
-typedef struct dqtest {
- xfs_dqmarker_t q_lists;
- xfs_dqhash_t *q_hash; /* the hashchain header */
- xfs_mount_t *q_mount; /* filesystem this relates to */
- xfs_dqid_t d_id; /* user id or group id */
- xfs_qcnt_t d_bcount; /* # disk blocks owned by the user */
- xfs_qcnt_t d_icount; /* # inodes owned by the user */
-} xfs_dqtest_t;
-
-STATIC void
-xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
-{
- xfs_dquot_t *d;
- if (((d) = (h)->qh_next))
- (d)->HL_PREVP = &((dqp)->HL_NEXT);
- (dqp)->HL_NEXT = d;
- (dqp)->HL_PREVP = &((h)->qh_next);
- (h)->qh_next = (xfs_dquot_t *)dqp;
- (h)->qh_version++;
- (h)->qh_nelems++;
-}
-STATIC void
-xfs_qm_dqtest_print(
- xfs_dqtest_t *d)
-{
- cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------");
- cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id);
- cmn_err(CE_DEBUG, "---- fs = 0x%p", d->q_mount);
- cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)",
- d->d_bcount, (int)d->d_bcount);
- cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)",
- d->d_icount, (int)d->d_icount);
- cmn_err(CE_DEBUG, "---------------------------");
-}
-
-STATIC void
-xfs_qm_dqtest_failed(
- xfs_dqtest_t *d,
- xfs_dquot_t *dqp,
- char *reason,
- xfs_qcnt_t a,
- xfs_qcnt_t b,
- int error)
-{
- qmtest_nfails++;
- if (error)
- cmn_err(CE_DEBUG, "quotacheck failed id=%d, err=%d\nreason: %s",
- d->d_id, error, reason);
- else
- cmn_err(CE_DEBUG, "quotacheck failed id=%d (%s) [%d != %d]",
- d->d_id, reason, (int)a, (int)b);
- xfs_qm_dqtest_print(d);
- if (dqp)
- xfs_qm_dqprint(dqp);
-}
-
-STATIC int
-xfs_dqtest_cmp2(
- xfs_dqtest_t *d,
- xfs_dquot_t *dqp)
-{
- int err = 0;
- if (be64_to_cpu(dqp->q_core.d_icount) != d->d_icount) {
- xfs_qm_dqtest_failed(d, dqp, "icount mismatch",
- be64_to_cpu(dqp->q_core.d_icount),
- d->d_icount, 0);
- err++;
- }
- if (be64_to_cpu(dqp->q_core.d_bcount) != d->d_bcount) {
- xfs_qm_dqtest_failed(d, dqp, "bcount mismatch",
- be64_to_cpu(dqp->q_core.d_bcount),
- d->d_bcount, 0);
- err++;
- }
- if (dqp->q_core.d_blk_softlimit &&
- be64_to_cpu(dqp->q_core.d_bcount) >=
- be64_to_cpu(dqp->q_core.d_blk_softlimit)) {
- if (!dqp->q_core.d_btimer && dqp->q_core.d_id) {
- cmn_err(CE_DEBUG,
- "%d [%s] [0x%p] BLK TIMER NOT STARTED",
- d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount);
- err++;
- }
- }
- if (dqp->q_core.d_ino_softlimit &&
- be64_to_cpu(dqp->q_core.d_icount) >=
- be64_to_cpu(dqp->q_core.d_ino_softlimit)) {
- if (!dqp->q_core.d_itimer && dqp->q_core.d_id) {
- cmn_err(CE_DEBUG,
- "%d [%s] [0x%p] INO TIMER NOT STARTED",
- d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount);
- err++;
- }
- }
-#ifdef QUOTADEBUG
- if (!err) {
- cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked",
- d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount);
- }
-#endif
- return (err);
-}
-
-STATIC void
-xfs_dqtest_cmp(
- xfs_dqtest_t *d)
-{
- xfs_dquot_t *dqp;
- int error;
-
- /* xfs_qm_dqtest_print(d); */
- if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0,
- &dqp))) {
- xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error);
- return;
- }
- xfs_dqtest_cmp2(d, dqp);
- xfs_qm_dqput(dqp);
-}
-
-STATIC int
-xfs_qm_internalqcheck_dqget(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- uint type,
- xfs_dqtest_t **O_dq)
-{
- xfs_dqtest_t *d;
- xfs_dqhash_t *h;
-
- h = DQTEST_HASH(mp, id, type);
- for (d = (xfs_dqtest_t *) h->qh_next; d != NULL;
- d = (xfs_dqtest_t *) d->HL_NEXT) {
- /* DQTEST_LIST_PRINT(h, HL_NEXT, "@@@@@ dqtestlist @@@@@"); */
- if (d->d_id == id && mp == d->q_mount) {
- *O_dq = d;
- return (0);
- }
- }
- d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP);
- d->dq_flags = type;
- d->d_id = id;
- d->q_mount = mp;
- d->q_hash = h;
- xfs_qm_hashinsert(h, d);
- *O_dq = d;
- return (0);
-}
-
-STATIC void
-xfs_qm_internalqcheck_get_dquots(
- xfs_mount_t *mp,
- xfs_dqid_t uid,
- xfs_dqid_t projid,
- xfs_dqid_t gid,
- xfs_dqtest_t **ud,
- xfs_dqtest_t **gd)
-{
- if (XFS_IS_UQUOTA_ON(mp))
- xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud);
- if (XFS_IS_GQUOTA_ON(mp))
- xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd);
- else if (XFS_IS_PQUOTA_ON(mp))
- xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd);
-}
-
-
-STATIC void
-xfs_qm_internalqcheck_dqadjust(
- xfs_inode_t *ip,
- xfs_dqtest_t *d)
-{
- d->d_icount++;
- d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks;
-}
-
-STATIC int
-xfs_qm_internalqcheck_adjust(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* not used */
- int ubsize, /* not used */
- void *private_data, /* not used */
- xfs_daddr_t bno, /* starting block of inode cluster */
- int *ubused, /* not used */
- void *dip, /* not used */
- int *res) /* bulkstat result code */
-{
- xfs_inode_t *ip;
- xfs_dqtest_t *ud, *gd;
- uint lock_flags;
- boolean_t ipreleased;
- int error;
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
- *res = BULKSTAT_RV_NOTHING;
- qdprintk("internalqcheck: ino=%llu, uqino=%llu, gqino=%llu\n",
- (unsigned long long) ino,
- (unsigned long long) mp->m_sb.sb_uquotino,
- (unsigned long long) mp->m_sb.sb_gquotino);
- return XFS_ERROR(EINVAL);
- }
- ipreleased = B_FALSE;
- again:
- lock_flags = XFS_ILOCK_SHARED;
- if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) {
- *res = BULKSTAT_RV_NOTHING;
- return (error);
- }
-
- if (ip->i_d.di_mode == 0) {
- xfs_iput_new(ip, lock_flags);
- *res = BULKSTAT_RV_NOTHING;
- return XFS_ERROR(ENOENT);
- }
-
- /*
- * This inode can have blocks after eof which can get released
- * when we send it to inactive. Since we don't check the dquot
- * until the after all our calculations are done, we must get rid
- * of those now.
- */
- if (! ipreleased) {
- xfs_iput(ip, lock_flags);
- ipreleased = B_TRUE;
- goto again;
- }
- xfs_qm_internalqcheck_get_dquots(mp,
- (xfs_dqid_t) ip->i_d.di_uid,
- (xfs_dqid_t) ip->i_d.di_projid,
- (xfs_dqid_t) ip->i_d.di_gid,
- &ud, &gd);
- if (XFS_IS_UQUOTA_ON(mp)) {
- ASSERT(ud);
- xfs_qm_internalqcheck_dqadjust(ip, ud);
- }
- if (XFS_IS_OQUOTA_ON(mp)) {
- ASSERT(gd);
- xfs_qm_internalqcheck_dqadjust(ip, gd);
- }
- xfs_iput(ip, lock_flags);
- *res = BULKSTAT_RV_DIDONE;
- return (0);
-}
-
-
-/* PRIVATE, debugging */
-int
-xfs_qm_internalqcheck(
- xfs_mount_t *mp)
-{
- xfs_ino_t lastino;
- int done, count;
- int i;
- xfs_dqtest_t *d, *e;
- xfs_dqhash_t *h1;
- int error;
-
- lastino = 0;
- qmtest_hashmask = 32;
- count = 5;
- done = 0;
- qmtest_nfails = 0;
-
- if (! XFS_IS_QUOTA_ON(mp))
- return XFS_ERROR(ESRCH);
-
- xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
- XFS_bflush(mp->m_ddev_targp);
- xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
- XFS_bflush(mp->m_ddev_targp);
-
- mutex_lock(&qcheck_lock);
- /* There should be absolutely no quota activity while this
- is going on. */
- qmtest_udqtab = kmem_zalloc(qmtest_hashmask *
- sizeof(xfs_dqhash_t), KM_SLEEP);
- qmtest_gdqtab = kmem_zalloc(qmtest_hashmask *
- sizeof(xfs_dqhash_t), KM_SLEEP);
- do {
- /*
- * Iterate thru all the inodes in the file system,
- * adjusting the corresponding dquot counters
- */
- if ((error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_internalqcheck_adjust, NULL,
- 0, NULL, BULKSTAT_FG_IGET, &done))) {
- break;
- }
- } while (! done);
- if (error) {
- cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
- }
- cmn_err(CE_DEBUG, "Checking results against system dquots");
- for (i = 0; i < qmtest_hashmask; i++) {
- h1 = &qmtest_udqtab[i];
- for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
- xfs_dqtest_cmp(d);
- e = (xfs_dqtest_t *) d->HL_NEXT;
- kmem_free(d, sizeof(xfs_dqtest_t));
- d = e;
- }
- h1 = &qmtest_gdqtab[i];
- for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
- xfs_dqtest_cmp(d);
- e = (xfs_dqtest_t *) d->HL_NEXT;
- kmem_free(d, sizeof(xfs_dqtest_t));
- d = e;
- }
- }
-
- if (qmtest_nfails) {
- cmn_err(CE_DEBUG, "******** quotacheck failed ********");
- cmn_err(CE_DEBUG, "failures = %d", qmtest_nfails);
- } else {
- cmn_err(CE_DEBUG, "******** quotacheck successful! ********");
- }
- kmem_free(qmtest_udqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
- kmem_free(qmtest_gdqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
- mutex_unlock(&qcheck_lock);
- return (qmtest_nfails);
-}
-
-#endif /* DEBUG */
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
deleted file mode 100644
index b7ddd04aae3..00000000000
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_QUOTA_PRIV_H__
-#define __XFS_QUOTA_PRIV_H__
-
-/*
- * Number of bmaps that we ask from bmapi when doing a quotacheck.
- * We make this restriction to keep the memory usage to a minimum.
- */
-#define XFS_DQITER_MAP_SIZE 10
-
-/* Number of dquots that fit in to a dquot block */
-#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk)
-
-#define XFS_ISLOCKED_INODE(ip) (ismrlocked(&(ip)->i_lock, \
- MR_UPDATE | MR_ACCESS) != 0)
-#define XFS_ISLOCKED_INODE_EXCL(ip) (ismrlocked(&(ip)->i_lock, \
- MR_UPDATE) != 0)
-
-#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t))
-
-#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims)
-#define XFS_QI_UQIP(mp) ((mp)->m_quotainfo->qi_uquotaip)
-#define XFS_QI_GQIP(mp) ((mp)->m_quotainfo->qi_gquotaip)
-#define XFS_QI_DQCHUNKLEN(mp) ((mp)->m_quotainfo->qi_dqchunklen)
-#define XFS_QI_BTIMELIMIT(mp) ((mp)->m_quotainfo->qi_btimelimit)
-#define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit)
-#define XFS_QI_ITIMELIMIT(mp) ((mp)->m_quotainfo->qi_itimelimit)
-#define XFS_QI_BWARNLIMIT(mp) ((mp)->m_quotainfo->qi_bwarnlimit)
-#define XFS_QI_RTBWARNLIMIT(mp) ((mp)->m_quotainfo->qi_rtbwarnlimit)
-#define XFS_QI_IWARNLIMIT(mp) ((mp)->m_quotainfo->qi_iwarnlimit)
-#define XFS_QI_QOFFLOCK(mp) ((mp)->m_quotainfo->qi_quotaofflock)
-
-#define XFS_QI_MPL_LIST(mp) ((mp)->m_quotainfo->qi_dqlist)
-#define XFS_QI_MPLLOCK(mp) ((mp)->m_quotainfo->qi_dqlist.qh_lock)
-#define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next)
-#define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems)
-
-#define XQMLCK(h) (mutex_lock(&((h)->qh_lock)))
-#define XQMUNLCK(h) (mutex_unlock(&((h)->qh_lock)))
-#ifdef DEBUG
-struct xfs_dqhash;
-static inline int XQMISLCKD(struct xfs_dqhash *h)
-{
- if (mutex_trylock(&h->qh_lock)) {
- mutex_unlock(&h->qh_lock);
- return 0;
- }
- return 1;
-}
-#endif
-
-#define XFS_DQ_HASH_LOCK(h) XQMLCK(h)
-#define XFS_DQ_HASH_UNLOCK(h) XQMUNLCK(h)
-#define XFS_DQ_IS_HASH_LOCKED(h) XQMISLCKD(h)
-
-#define xfs_qm_mplist_lock(mp) XQMLCK(&(XFS_QI_MPL_LIST(mp)))
-#define xfs_qm_mplist_unlock(mp) XQMUNLCK(&(XFS_QI_MPL_LIST(mp)))
-#define XFS_QM_IS_MPLIST_LOCKED(mp) XQMISLCKD(&(XFS_QI_MPL_LIST(mp)))
-
-#define xfs_qm_freelist_lock(qm) XQMLCK(&((qm)->qm_dqfreelist))
-#define xfs_qm_freelist_unlock(qm) XQMUNLCK(&((qm)->qm_dqfreelist))
-#define XFS_QM_IS_FREELIST_LOCKED(qm) XQMISLCKD(&((qm)->qm_dqfreelist))
-
-/*
- * Hash into a bucket in the dquot hash table, based on <mp, id>.
- */
-#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
- (__psunsigned_t)(id)) & \
- (xfs_Gqm->qm_dqhashmask - 1))
-#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \
- (xfs_Gqm->qm_usr_dqhtable + \
- XFS_DQ_HASHVAL(mp, id)) : \
- (xfs_Gqm->qm_grp_dqhtable + \
- XFS_DQ_HASHVAL(mp, id)))
-#define XFS_IS_DQTYPE_ON(mp, type) (type == XFS_DQ_USER ? \
- XFS_IS_UQUOTA_ON(mp) : \
- XFS_IS_OQUOTA_ON(mp))
-#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
- !dqp->q_core.d_blk_hardlimit && \
- !dqp->q_core.d_blk_softlimit && \
- !dqp->q_core.d_rtb_hardlimit && \
- !dqp->q_core.d_rtb_softlimit && \
- !dqp->q_core.d_ino_hardlimit && \
- !dqp->q_core.d_ino_softlimit && \
- !dqp->q_core.d_bcount && \
- !dqp->q_core.d_rtbcount && \
- !dqp->q_core.d_icount)
-
-#define HL_PREVP dq_hashlist.ql_prevp
-#define HL_NEXT dq_hashlist.ql_next
-#define MPL_PREVP dq_mplist.ql_prevp
-#define MPL_NEXT dq_mplist.ql_next
-
-
-#define _LIST_REMOVE(h, dqp, PVP, NXT) \
- { \
- xfs_dquot_t *d; \
- if (((d) = (dqp)->NXT)) \
- (d)->PVP = (dqp)->PVP; \
- *((dqp)->PVP) = d; \
- (dqp)->NXT = NULL; \
- (dqp)->PVP = NULL; \
- (h)->qh_version++; \
- (h)->qh_nelems--; \
- }
-
-#define _LIST_INSERT(h, dqp, PVP, NXT) \
- { \
- xfs_dquot_t *d; \
- if (((d) = (h)->qh_next)) \
- (d)->PVP = &((dqp)->NXT); \
- (dqp)->NXT = d; \
- (dqp)->PVP = &((h)->qh_next); \
- (h)->qh_next = dqp; \
- (h)->qh_version++; \
- (h)->qh_nelems++; \
- }
-
-#define FOREACH_DQUOT_IN_MP(dqp, mp) \
- for ((dqp) = XFS_QI_MPLNEXT(mp); (dqp) != NULL; (dqp) = (dqp)->MPL_NEXT)
-
-#define FOREACH_DQUOT_IN_FREELIST(dqp, qlist) \
-for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
- (dqp) = (dqp)->dq_flnext)
-
-#define XQM_HASHLIST_INSERT(h, dqp) \
- _LIST_INSERT(h, dqp, HL_PREVP, HL_NEXT)
-
-#define XQM_FREELIST_INSERT(h, dqp) \
- xfs_qm_freelist_append(h, dqp)
-
-#define XQM_MPLIST_INSERT(h, dqp) \
- _LIST_INSERT(h, dqp, MPL_PREVP, MPL_NEXT)
-
-#define XQM_HASHLIST_REMOVE(h, dqp) \
- _LIST_REMOVE(h, dqp, HL_PREVP, HL_NEXT)
-#define XQM_FREELIST_REMOVE(dqp) \
- xfs_qm_freelist_unlink(dqp)
-#define XQM_MPLIST_REMOVE(h, dqp) \
- { _LIST_REMOVE(h, dqp, MPL_PREVP, MPL_NEXT); \
- XFS_QI_MPLRECLAIMS((dqp)->q_mount)++; }
-
-#define XFS_DQ_IS_LOGITEM_INITD(dqp) ((dqp)->q_logitem.qli_dquot == (dqp))
-
-#define XFS_QM_DQP_TO_DQACCT(tp, dqp) (XFS_QM_ISUDQ(dqp) ? \
- (tp)->t_dqinfo->dqa_usrdquots : \
- (tp)->t_dqinfo->dqa_grpdquots)
-#define XFS_IS_SUSER_DQUOT(dqp) \
- (!((dqp)->q_core.d_id))
-
-#define XFS_PURGE_INODE(ip) \
- IRELE(ip);
-
-#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
- (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
- (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
-#define DQFLAGTO_DIRTYSTR(d) (XFS_DQ_IS_DIRTY(d) ? "DIRTY" : "NOTDIRTY")
-
-#endif /* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
deleted file mode 100644
index b08b3d9345b..00000000000
--- a/fs/xfs/support/debug.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "debug.h"
-#include "spin.h"
-#include <asm/page.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-
-static char message[256]; /* keep it off the stack */
-static DEFINE_SPINLOCK(xfs_err_lock);
-
-/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */
-#define XFS_MAX_ERR_LEVEL 7
-#define XFS_ERR_MASK ((1 << 3) - 1)
-static const char * const err_level[XFS_MAX_ERR_LEVEL+1] =
- {KERN_EMERG, KERN_ALERT, KERN_CRIT,
- KERN_ERR, KERN_WARNING, KERN_NOTICE,
- KERN_INFO, KERN_DEBUG};
-
-void
-cmn_err(register int level, char *fmt, ...)
-{
- char *fp = fmt;
- int len;
- ulong flags;
- va_list ap;
-
- level &= XFS_ERR_MASK;
- if (level > XFS_MAX_ERR_LEVEL)
- level = XFS_MAX_ERR_LEVEL;
- spin_lock_irqsave(&xfs_err_lock,flags);
- va_start(ap, fmt);
- if (*fmt == '!') fp++;
- len = vsprintf(message, fp, ap);
- if (message[len-1] != '\n')
- strcat(message, "\n");
- printk("%s%s", err_level[level], message);
- va_end(ap);
- spin_unlock_irqrestore(&xfs_err_lock,flags);
-
- if (level == CE_PANIC)
- BUG();
-}
-
-void
-icmn_err(register int level, char *fmt, va_list ap)
-{
- ulong flags;
- int len;
-
- level &= XFS_ERR_MASK;
- if(level > XFS_MAX_ERR_LEVEL)
- level = XFS_MAX_ERR_LEVEL;
- spin_lock_irqsave(&xfs_err_lock,flags);
- len = vsprintf(message, fmt, ap);
- if (message[len-1] != '\n')
- strcat(message, "\n");
- spin_unlock_irqrestore(&xfs_err_lock,flags);
- printk("%s%s", err_level[level], message);
- if (level == CE_PANIC)
- BUG();
-}
-
-void
-assfail(char *expr, char *file, int line)
-{
- printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line);
- BUG();
-}
-
-#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
-unsigned long random(void)
-{
- static unsigned long RandomValue = 1;
- /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
- register long rv = RandomValue;
- register long lo;
- register long hi;
-
- hi = rv / 127773;
- lo = rv % 127773;
- rv = 16807 * lo - 2836 * hi;
- if (rv <= 0) rv += 2147483647;
- return RandomValue = rv;
-}
-#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
deleted file mode 100644
index e3bf58112e7..00000000000
--- a/fs/xfs/support/debug.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_DEBUG_H__
-#define __XFS_SUPPORT_DEBUG_H__
-
-#include <stdarg.h>
-
-#define CE_DEBUG 7 /* debug */
-#define CE_CONT 6 /* continuation */
-#define CE_NOTE 5 /* notice */
-#define CE_WARN 4 /* warning */
-#define CE_ALERT 1 /* alert */
-#define CE_PANIC 0 /* panic */
-
-extern void icmn_err(int, char *, va_list)
- __attribute__ ((format (printf, 2, 0)));
-extern void cmn_err(int, char *, ...)
- __attribute__ ((format (printf, 2, 3)));
-extern void assfail(char *expr, char *f, int l);
-
-#define prdev(fmt,targ,args...) \
- printk("Device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
-
-#define ASSERT_ALWAYS(expr) \
- (unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-
-#ifndef DEBUG
-# define ASSERT(expr) ((void)0)
-#else
-# define ASSERT(expr) ASSERT_ALWAYS(expr)
-extern unsigned long random(void);
-#endif
-
-#ifndef STATIC
-# define STATIC static
-#endif
-
-#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
deleted file mode 100644
index 841aa4c15b8..00000000000
--- a/fs/xfs/support/ktrace.c
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include <xfs.h>
-
-static kmem_zone_t *ktrace_hdr_zone;
-static kmem_zone_t *ktrace_ent_zone;
-static int ktrace_zentries;
-
-void
-ktrace_init(int zentries)
-{
- ktrace_zentries = zentries;
-
- ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t),
- "ktrace_hdr");
- ASSERT(ktrace_hdr_zone);
-
- ktrace_ent_zone = kmem_zone_init(ktrace_zentries
- * sizeof(ktrace_entry_t),
- "ktrace_ent");
- ASSERT(ktrace_ent_zone);
-}
-
-void
-ktrace_uninit(void)
-{
- kmem_cache_destroy(ktrace_hdr_zone);
- kmem_cache_destroy(ktrace_ent_zone);
-}
-
-/*
- * ktrace_alloc()
- *
- * Allocate a ktrace header and enough buffering for the given
- * number of entries.
- */
-ktrace_t *
-ktrace_alloc(int nentries, unsigned int __nocast sleep)
-{
- ktrace_t *ktp;
- ktrace_entry_t *ktep;
-
- ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep);
-
- if (ktp == (ktrace_t*)NULL) {
- /*
- * KM_SLEEP callers don't expect failure.
- */
- if (sleep & KM_SLEEP)
- panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
-
- return NULL;
- }
-
- /*
- * Special treatment for buffers with the ktrace_zentries entries
- */
- if (nentries == ktrace_zentries) {
- ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone,
- sleep);
- } else {
- ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)),
- sleep);
- }
-
- if (ktep == NULL) {
- /*
- * KM_SLEEP callers don't expect failure.
- */
- if (sleep & KM_SLEEP)
- panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
-
- kmem_free(ktp, sizeof(*ktp));
-
- return NULL;
- }
-
- spinlock_init(&(ktp->kt_lock), "kt_lock");
-
- ktp->kt_entries = ktep;
- ktp->kt_nentries = nentries;
- ktp->kt_index = 0;
- ktp->kt_rollover = 0;
- return ktp;
-}
-
-
-/*
- * ktrace_free()
- *
- * Free up the ktrace header and buffer. It is up to the caller
- * to ensure that no-one is referencing it.
- */
-void
-ktrace_free(ktrace_t *ktp)
-{
- int entries_size;
-
- if (ktp == (ktrace_t *)NULL)
- return;
-
- spinlock_destroy(&ktp->kt_lock);
-
- /*
- * Special treatment for the Vnode trace buffer.
- */
- if (ktp->kt_nentries == ktrace_zentries) {
- kmem_zone_free(ktrace_ent_zone, ktp->kt_entries);
- } else {
- entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t));
-
- kmem_free(ktp->kt_entries, entries_size);
- }
-
- kmem_zone_free(ktrace_hdr_zone, ktp);
-}
-
-
-/*
- * Enter the given values into the "next" entry in the trace buffer.
- * kt_index is always the index of the next entry to be filled.
- */
-void
-ktrace_enter(
- ktrace_t *ktp,
- void *val0,
- void *val1,
- void *val2,
- void *val3,
- void *val4,
- void *val5,
- void *val6,
- void *val7,
- void *val8,
- void *val9,
- void *val10,
- void *val11,
- void *val12,
- void *val13,
- void *val14,
- void *val15)
-{
- static DEFINE_SPINLOCK(wrap_lock);
- unsigned long flags;
- int index;
- ktrace_entry_t *ktep;
-
- ASSERT(ktp != NULL);
-
- /*
- * Grab an entry by pushing the index up to the next one.
- */
- spin_lock_irqsave(&wrap_lock, flags);
- index = ktp->kt_index;
- if (++ktp->kt_index == ktp->kt_nentries)
- ktp->kt_index = 0;
- spin_unlock_irqrestore(&wrap_lock, flags);
-
- if (!ktp->kt_rollover && index == ktp->kt_nentries - 1)
- ktp->kt_rollover = 1;
-
- ASSERT((index >= 0) && (index < ktp->kt_nentries));
-
- ktep = &(ktp->kt_entries[index]);
-
- ktep->val[0] = val0;
- ktep->val[1] = val1;
- ktep->val[2] = val2;
- ktep->val[3] = val3;
- ktep->val[4] = val4;
- ktep->val[5] = val5;
- ktep->val[6] = val6;
- ktep->val[7] = val7;
- ktep->val[8] = val8;
- ktep->val[9] = val9;
- ktep->val[10] = val10;
- ktep->val[11] = val11;
- ktep->val[12] = val12;
- ktep->val[13] = val13;
- ktep->val[14] = val14;
- ktep->val[15] = val15;
-}
-
-/*
- * Return the number of entries in the trace buffer.
- */
-int
-ktrace_nentries(
- ktrace_t *ktp)
-{
- if (ktp == NULL) {
- return 0;
- }
-
- return (ktp->kt_rollover ? ktp->kt_nentries : ktp->kt_index);
-}
-
-/*
- * ktrace_first()
- *
- * This is used to find the start of the trace buffer.
- * In conjunction with ktrace_next() it can be used to
- * iterate through the entire trace buffer. This code does
- * not do any locking because it is assumed that it is called
- * from the debugger.
- *
- * The caller must pass in a pointer to a ktrace_snap
- * structure in which we will keep some state used to
- * iterate through the buffer. This state must not touched
- * by any code outside of this module.
- */
-ktrace_entry_t *
-ktrace_first(ktrace_t *ktp, ktrace_snap_t *ktsp)
-{
- ktrace_entry_t *ktep;
- int index;
- int nentries;
-
- if (ktp->kt_rollover)
- index = ktp->kt_index;
- else
- index = 0;
-
- ktsp->ks_start = index;
- ktep = &(ktp->kt_entries[index]);
-
- nentries = ktrace_nentries(ktp);
- index++;
- if (index < nentries) {
- ktsp->ks_index = index;
- } else {
- ktsp->ks_index = 0;
- if (index > nentries)
- ktep = NULL;
- }
- return ktep;
-}
-
-/*
- * ktrace_next()
- *
- * This is used to iterate through the entries of the given
- * trace buffer. The caller must pass in the ktrace_snap_t
- * structure initialized by ktrace_first(). The return value
- * will be either a pointer to the next ktrace_entry or NULL
- * if all of the entries have been traversed.
- */
-ktrace_entry_t *
-ktrace_next(
- ktrace_t *ktp,
- ktrace_snap_t *ktsp)
-{
- int index;
- ktrace_entry_t *ktep;
-
- index = ktsp->ks_index;
- if (index == ktsp->ks_start) {
- ktep = NULL;
- } else {
- ktep = &ktp->kt_entries[index];
- }
-
- index++;
- if (index == ktrace_nentries(ktp)) {
- ktsp->ks_index = 0;
- } else {
- ktsp->ks_index = index;
- }
-
- return ktep;
-}
-
-/*
- * ktrace_skip()
- *
- * Skip the next "count" entries and return the entry after that.
- * Return NULL if this causes us to iterate past the beginning again.
- */
-ktrace_entry_t *
-ktrace_skip(
- ktrace_t *ktp,
- int count,
- ktrace_snap_t *ktsp)
-{
- int index;
- int new_index;
- ktrace_entry_t *ktep;
- int nentries = ktrace_nentries(ktp);
-
- index = ktsp->ks_index;
- new_index = index + count;
- while (new_index >= nentries) {
- new_index -= nentries;
- }
- if (index == ktsp->ks_start) {
- /*
- * We've iterated around to the start, so we're done.
- */
- ktep = NULL;
- } else if ((new_index < index) && (index < ktsp->ks_index)) {
- /*
- * We've skipped past the start again, so we're done.
- */
- ktep = NULL;
- ktsp->ks_index = ktsp->ks_start;
- } else {
- ktep = &(ktp->kt_entries[new_index]);
- new_index++;
- if (new_index == nentries) {
- ktsp->ks_index = 0;
- } else {
- ktsp->ks_index = new_index;
- }
- }
- return ktep;
-}
diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h
deleted file mode 100644
index 0d73216287c..00000000000
--- a/fs/xfs/support/ktrace.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_KTRACE_H__
-#define __XFS_SUPPORT_KTRACE_H__
-
-#include <spin.h>
-
-/*
- * Trace buffer entry structure.
- */
-typedef struct ktrace_entry {
- void *val[16];
-} ktrace_entry_t;
-
-/*
- * Trace buffer header structure.
- */
-typedef struct ktrace {
- lock_t kt_lock; /* mutex to guard counters */
- int kt_nentries; /* number of entries in trace buf */
- int kt_index; /* current index in entries */
- int kt_rollover;
- ktrace_entry_t *kt_entries; /* buffer of entries */
-} ktrace_t;
-
-/*
- * Trace buffer snapshot structure.
- */
-typedef struct ktrace_snap {
- int ks_start; /* kt_index at time of snap */
- int ks_index; /* current index */
-} ktrace_snap_t;
-
-
-#ifdef CONFIG_XFS_TRACE
-
-extern void ktrace_init(int zentries);
-extern void ktrace_uninit(void);
-
-extern ktrace_t *ktrace_alloc(int, unsigned int __nocast);
-extern void ktrace_free(ktrace_t *);
-
-extern void ktrace_enter(
- ktrace_t *,
- void *,
- void *,
- void *,
- void *,
- void *,
- void *,
- void *,
- void *,
- void *,
- void *,
- void *,
- void *,
- void *,
- void *,
- void *,
- void *);
-
-extern ktrace_entry_t *ktrace_first(ktrace_t *, ktrace_snap_t *);
-extern int ktrace_nentries(ktrace_t *);
-extern ktrace_entry_t *ktrace_next(ktrace_t *, ktrace_snap_t *);
-extern ktrace_entry_t *ktrace_skip(ktrace_t *, int, ktrace_snap_t *);
-
-#else
-#define ktrace_init(x) do { } while (0)
-#define ktrace_uninit() do { } while (0)
-#endif /* CONFIG_XFS_TRACE */
-
-#endif /* __XFS_SUPPORT_KTRACE_H__ */
diff --git a/fs/xfs/support/move.c b/fs/xfs/support/move.c
deleted file mode 100644
index caefa17b80f..00000000000
--- a/fs/xfs/support/move.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include <xfs.h>
-
-/* Read from kernel buffer at src to user/kernel buffer defined
- * by the uio structure. Advance the pointer in the uio struct
- * as we go.
- */
-int
-uio_read(caddr_t src, size_t len, struct uio *uio)
-{
- size_t count;
-
- if (!len || !uio->uio_resid)
- return 0;
-
- count = uio->uio_iov->iov_len;
- if (!count)
- return 0;
- if (count > len)
- count = len;
-
- if (uio->uio_segflg == UIO_USERSPACE) {
- if (copy_to_user(uio->uio_iov->iov_base, src, count))
- return EFAULT;
- } else {
- ASSERT(uio->uio_segflg == UIO_SYSSPACE);
- memcpy(uio->uio_iov->iov_base, src, count);
- }
-
- uio->uio_iov->iov_base = (void*)((char*)uio->uio_iov->iov_base + count);
- uio->uio_iov->iov_len -= count;
- uio->uio_offset += count;
- uio->uio_resid -= count;
- return 0;
-}
diff --git a/fs/xfs/support/move.h b/fs/xfs/support/move.h
deleted file mode 100644
index 97a2498d2da..00000000000
--- a/fs/xfs/support/move.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Portions Copyright (c) 1982, 1986, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#ifndef __XFS_SUPPORT_MOVE_H__
-#define __XFS_SUPPORT_MOVE_H__
-
-#include <linux/uio.h>
-#include <asm/uaccess.h>
-
-/* Segment flag values. */
-enum uio_seg {
- UIO_USERSPACE, /* from user data space */
- UIO_SYSSPACE, /* from system space */
-};
-
-struct uio {
- struct iovec *uio_iov; /* pointer to array of iovecs */
- int uio_iovcnt; /* number of iovecs in array */
- xfs_off_t uio_offset; /* offset in file this uio corresponds to */
- int uio_resid; /* residual i/o count */
- enum uio_seg uio_segflg; /* see above */
-};
-
-typedef struct uio uio_t;
-typedef struct iovec iovec_t;
-
-extern int uio_read (caddr_t, size_t, uio_t *);
-
-#endif /* __XFS_SUPPORT_MOVE_H__ */
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/time.h
index 387e695a184..387e695a184 100644
--- a/fs/xfs/linux-2.6/time.h
+++ b/fs/xfs/time.h
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/uuid.c
index a3d565a6773..b83f76b6d41 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/uuid.c
@@ -17,17 +17,6 @@
*/
#include <xfs.h>
-static mutex_t uuid_monitor;
-static int uuid_table_size;
-static uuid_t *uuid_table;
-
-void
-uuid_init(void)
-{
- mutex_init(&uuid_monitor);
-}
-
-
/* IRIX interpretation of an uuid_t */
typedef struct {
__be32 uu_timelow;
@@ -50,13 +39,7 @@ uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
be16_to_cpu(uup->uu_timemid);
- fsid[1] = be16_to_cpu(uup->uu_timelow);
-}
-
-void
-uuid_create_nil(uuid_t *uuid)
-{
- memset(uuid, 0, sizeof(*uuid));
+ fsid[1] = be32_to_cpu(uup->uu_timelow);
}
int
@@ -78,64 +61,3 @@ uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
{
return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
}
-
-/*
- * Given a 128-bit uuid, return a 64-bit value by adding the top and bottom
- * 64-bit words. NOTE: This function can not be changed EVER. Although
- * brain-dead, some applications depend on this 64-bit value remaining
- * persistent. Specifically, DMI vendors store the value as a persistent
- * filehandle.
- */
-__uint64_t
-uuid_hash64(uuid_t *uuid)
-{
- __uint64_t *sp = (__uint64_t *)uuid;
-
- return sp[0] + sp[1];
-}
-
-int
-uuid_table_insert(uuid_t *uuid)
-{
- int i, hole;
-
- mutex_lock(&uuid_monitor);
- for (i = 0, hole = -1; i < uuid_table_size; i++) {
- if (uuid_is_nil(&uuid_table[i])) {
- hole = i;
- continue;
- }
- if (uuid_equal(uuid, &uuid_table[i])) {
- mutex_unlock(&uuid_monitor);
- return 0;
- }
- }
- if (hole < 0) {
- uuid_table = kmem_realloc(uuid_table,
- (uuid_table_size + 1) * sizeof(*uuid_table),
- uuid_table_size * sizeof(*uuid_table),
- KM_SLEEP);
- hole = uuid_table_size++;
- }
- uuid_table[hole] = *uuid;
- mutex_unlock(&uuid_monitor);
- return 1;
-}
-
-void
-uuid_table_remove(uuid_t *uuid)
-{
- int i;
-
- mutex_lock(&uuid_monitor);
- for (i = 0; i < uuid_table_size; i++) {
- if (uuid_is_nil(&uuid_table[i]))
- continue;
- if (!uuid_equal(uuid, &uuid_table[i]))
- continue;
- uuid_create_nil(&uuid_table[i]);
- break;
- }
- ASSERT(i < uuid_table_size);
- mutex_unlock(&uuid_monitor);
-}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/uuid.h
index b6f5922199b..104db0f3bed 100644
--- a/fs/xfs/support/uuid.h
+++ b/fs/xfs/uuid.h
@@ -22,13 +22,14 @@ typedef struct {
unsigned char __u_bits[16];
} uuid_t;
-extern void uuid_init(void);
-extern void uuid_create_nil(uuid_t *uuid);
extern int uuid_is_nil(uuid_t *uuid);
extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-extern __uint64_t uuid_hash64(uuid_t *uuid);
-extern int uuid_table_insert(uuid_t *uuid);
-extern void uuid_table_remove(uuid_t *uuid);
+
+static inline void
+uuid_copy(uuid_t *dst, uuid_t *src)
+{
+ memcpy(dst, src, sizeof(uuid_t));
+}
#endif /* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 1a48dbb902a..a742c47f7d5 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -17,5 +17,18 @@
*/
#ifndef __XFS_H__
#define __XFS_H__
-#include <linux-2.6/xfs_linux.h>
+
+#ifdef CONFIG_XFS_DEBUG
+#define STATIC
+#define DEBUG 1
+#define XFS_BUF_LOCK_TRACKING 1
+#endif
+
+#ifdef CONFIG_XFS_WARN
+#define XFS_WARN 1
+#endif
+
+
+#include "xfs_linux.h"
+
#endif /* __XFS_H__ */
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 4ff0f4e41c6..6888ad886ff 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2008, Christoph Hellwig
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
@@ -16,912 +16,291 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_inum.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_btree.h"
#include "xfs_acl.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
-
-#include <linux/capability.h>
+#include "xfs_trace.h"
+#include <linux/slab.h>
+#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
-STATIC int xfs_acl_setmode(vnode_t *, xfs_acl_t *, int *);
-STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *);
-STATIC void xfs_acl_get_endian(xfs_acl_t *);
-STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
-STATIC int xfs_acl_invalid(xfs_acl_t *);
-STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *);
-STATIC void xfs_acl_get_attr(vnode_t *, xfs_acl_t *, int, int, int *);
-STATIC void xfs_acl_set_attr(vnode_t *, xfs_acl_t *, int, int *);
-STATIC int xfs_acl_allow_set(vnode_t *, int);
-
-kmem_zone_t *xfs_acl_zone;
-
-
-/*
- * Test for existence of access ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_access(
- vnode_t *vp)
-{
- int error;
-
- xfs_acl_get_attr(vp, NULL, _ACL_TYPE_ACCESS, ATTR_KERNOVAL, &error);
- return (error == 0);
-}
/*
- * Test for existence of default ACL attribute as efficiently as possible.
+ * Locking scheme:
+ * - all ACL updates are protected by inode->i_mutex, which is taken before
+ * calling into this file.
*/
-int
-xfs_acl_vhasacl_default(
- vnode_t *vp)
-{
- int error;
- if (!VN_ISDIR(vp))
- return 0;
- xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
- return (error == 0);
-}
-
-/*
- * Convert from extended attribute representation to in-memory for XFS.
- */
-STATIC int
-posix_acl_xattr_to_xfs(
- posix_acl_xattr_header *src,
- size_t size,
- xfs_acl_t *dest)
+STATIC struct posix_acl *
+xfs_acl_from_disk(
+ struct xfs_acl *aclp,
+ int max_entries)
{
- posix_acl_xattr_entry *src_entry;
- xfs_acl_entry_t *dest_entry;
- int n;
-
- if (!src || !dest)
- return EINVAL;
-
- if (size < sizeof(posix_acl_xattr_header))
- return EINVAL;
+ struct posix_acl_entry *acl_e;
+ struct posix_acl *acl;
+ struct xfs_acl_entry *ace;
+ unsigned int count, i;
- if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
- return EOPNOTSUPP;
+ count = be32_to_cpu(aclp->acl_cnt);
+ if (count > max_entries)
+ return ERR_PTR(-EFSCORRUPTED);
- memset(dest, 0, sizeof(xfs_acl_t));
- dest->acl_cnt = posix_acl_xattr_count(size);
- if (dest->acl_cnt < 0 || dest->acl_cnt > XFS_ACL_MAX_ENTRIES)
- return EINVAL;
+ acl = posix_acl_alloc(count, GFP_KERNEL);
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
- /*
- * acl_set_file(3) may request that we set default ACLs with
- * zero length -- defend (gracefully) against that here.
- */
- if (!dest->acl_cnt)
- return 0;
+ for (i = 0; i < count; i++) {
+ acl_e = &acl->a_entries[i];
+ ace = &aclp->acl_entry[i];
- src_entry = (posix_acl_xattr_entry *)((char *)src + sizeof(*src));
- dest_entry = &dest->acl_entry[0];
+ /*
+ * The tag is 32 bits on disk and 16 bits in core.
+ *
+ * Because every access to it goes through the core
+ * format first this is not a problem.
+ */
+ acl_e->e_tag = be32_to_cpu(ace->ae_tag);
+ acl_e->e_perm = be16_to_cpu(ace->ae_perm);
- for (n = 0; n < dest->acl_cnt; n++, src_entry++, dest_entry++) {
- dest_entry->ae_perm = le16_to_cpu(src_entry->e_perm);
- if (_ACL_PERM_INVALID(dest_entry->ae_perm))
- return EINVAL;
- dest_entry->ae_tag = le16_to_cpu(src_entry->e_tag);
- switch(dest_entry->ae_tag) {
+ switch (acl_e->e_tag) {
case ACL_USER:
+ acl_e->e_uid = xfs_uid_to_kuid(be32_to_cpu(ace->ae_id));
+ break;
case ACL_GROUP:
- dest_entry->ae_id = le32_to_cpu(src_entry->e_id);
+ acl_e->e_gid = xfs_gid_to_kgid(be32_to_cpu(ace->ae_id));
break;
case ACL_USER_OBJ:
case ACL_GROUP_OBJ:
case ACL_MASK:
case ACL_OTHER:
- dest_entry->ae_id = ACL_UNDEFINED_ID;
break;
default:
- return EINVAL;
+ goto fail;
}
}
- if (xfs_acl_invalid(dest))
- return EINVAL;
+ return acl;
- return 0;
+fail:
+ posix_acl_release(acl);
+ return ERR_PTR(-EINVAL);
}
-/*
- * Comparison function called from xfs_sort().
- * Primary key is ae_tag, secondary key is ae_id.
- */
-STATIC int
-xfs_acl_entry_compare(
- const void *va,
- const void *vb)
+STATIC void
+xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
{
- xfs_acl_entry_t *a = (xfs_acl_entry_t *)va,
- *b = (xfs_acl_entry_t *)vb;
+ const struct posix_acl_entry *acl_e;
+ struct xfs_acl_entry *ace;
+ int i;
- if (a->ae_tag == b->ae_tag)
- return (a->ae_id - b->ae_id);
- return (a->ae_tag - b->ae_tag);
-}
+ aclp->acl_cnt = cpu_to_be32(acl->a_count);
+ for (i = 0; i < acl->a_count; i++) {
+ ace = &aclp->acl_entry[i];
+ acl_e = &acl->a_entries[i];
-/*
- * Convert from in-memory XFS to extended attribute representation.
- */
-STATIC int
-posix_acl_xfs_to_xattr(
- xfs_acl_t *src,
- posix_acl_xattr_header *dest,
- size_t size)
-{
- int n;
- size_t new_size = posix_acl_xattr_size(src->acl_cnt);
- posix_acl_xattr_entry *dest_entry;
- xfs_acl_entry_t *src_entry;
-
- if (size < new_size)
- return -ERANGE;
-
- /* Need to sort src XFS ACL by <ae_tag,ae_id> */
- xfs_sort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]),
- xfs_acl_entry_compare);
-
- dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
- dest_entry = &dest->a_entries[0];
- src_entry = &src->acl_entry[0];
- for (n = 0; n < src->acl_cnt; n++, dest_entry++, src_entry++) {
- dest_entry->e_perm = cpu_to_le16(src_entry->ae_perm);
- if (_ACL_PERM_INVALID(src_entry->ae_perm))
- return -EINVAL;
- dest_entry->e_tag = cpu_to_le16(src_entry->ae_tag);
- switch (src_entry->ae_tag) {
+ ace->ae_tag = cpu_to_be32(acl_e->e_tag);
+ switch (acl_e->e_tag) {
case ACL_USER:
+ ace->ae_id = cpu_to_be32(xfs_kuid_to_uid(acl_e->e_uid));
+ break;
case ACL_GROUP:
- dest_entry->e_id = cpu_to_le32(src_entry->ae_id);
- break;
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- dest_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
+ ace->ae_id = cpu_to_be32(xfs_kgid_to_gid(acl_e->e_gid));
break;
default:
- return -EINVAL;
- }
- }
- return new_size;
-}
-
-int
-xfs_acl_vget(
- vnode_t *vp,
- void *acl,
- size_t size,
- int kind)
-{
- int error;
- xfs_acl_t *xfs_acl = NULL;
- posix_acl_xattr_header *ext_acl = acl;
- int flags = 0;
-
- VN_HOLD(vp);
- if(size) {
- if (!(_ACL_ALLOC(xfs_acl))) {
- error = ENOMEM;
- goto out;
+ ace->ae_id = cpu_to_be32(ACL_UNDEFINED_ID);
+ break;
}
- memset(xfs_acl, 0, sizeof(xfs_acl_t));
- } else
- flags = ATTR_KERNOVAL;
- xfs_acl_get_attr(vp, xfs_acl, kind, flags, &error);
- if (error)
- goto out;
-
- if (!size) {
- error = -posix_acl_xattr_size(XFS_ACL_MAX_ENTRIES);
- } else {
- if (xfs_acl_invalid(xfs_acl)) {
- error = EINVAL;
- goto out;
- }
- if (kind == _ACL_TYPE_ACCESS) {
- vattr_t va;
-
- va.va_mask = XFS_AT_MODE;
- VOP_GETATTR(vp, &va, 0, sys_cred, error);
- if (error)
- goto out;
- xfs_acl_sync_mode(va.va_mode, xfs_acl);
- }
- error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
+ ace->ae_perm = cpu_to_be16(acl_e->e_perm);
}
-out:
- VN_RELE(vp);
- if(xfs_acl)
- _ACL_FREE(xfs_acl);
- return -error;
}
-int
-xfs_acl_vremove(
- vnode_t *vp,
- int kind)
+struct posix_acl *
+xfs_get_acl(struct inode *inode, int type)
{
- int error;
-
- VN_HOLD(vp);
- error = xfs_acl_allow_set(vp, kind);
- if (!error) {
- VOP_ATTR_REMOVE(vp, kind == _ACL_TYPE_DEFAULT?
- SGI_ACL_DEFAULT: SGI_ACL_FILE,
- ATTR_ROOT, sys_cred, error);
- if (error == ENOATTR)
- error = 0; /* 'scool */
+ struct xfs_inode *ip = XFS_I(inode);
+ struct posix_acl *acl = NULL;
+ struct xfs_acl *xfs_acl;
+ unsigned char *ea_name;
+ int error;
+ int len;
+
+ trace_xfs_get_acl(ip);
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ea_name = SGI_ACL_FILE;
+ break;
+ case ACL_TYPE_DEFAULT:
+ ea_name = SGI_ACL_DEFAULT;
+ break;
+ default:
+ BUG();
}
- VN_RELE(vp);
- return -error;
-}
-int
-xfs_acl_vset(
- vnode_t *vp,
- void *acl,
- size_t size,
- int kind)
-{
- posix_acl_xattr_header *ext_acl = acl;
- xfs_acl_t *xfs_acl;
- int error;
- int basicperms = 0; /* more than std unix perms? */
-
- if (!acl)
- return -EINVAL;
-
- if (!(_ACL_ALLOC(xfs_acl)))
- return -ENOMEM;
+ /*
+ * If we have a cached ACLs value just return it, not need to
+ * go out to the disk.
+ */
+ len = XFS_ACL_MAX_SIZE(ip->i_mount);
+ xfs_acl = kmem_zalloc_large(len, KM_SLEEP);
+ if (!xfs_acl)
+ return ERR_PTR(-ENOMEM);
- error = posix_acl_xattr_to_xfs(ext_acl, size, xfs_acl);
+ error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
+ &len, ATTR_ROOT);
if (error) {
- _ACL_FREE(xfs_acl);
- return -error;
- }
- if (!xfs_acl->acl_cnt) {
- _ACL_FREE(xfs_acl);
- return 0;
+ /*
+ * If the attribute doesn't exist make sure we have a negative
+ * cache entry, for any other error assume it is transient and
+ * leave the cache entry as ACL_NOT_CACHED.
+ */
+ if (error == -ENOATTR)
+ goto out_update_cache;
+ goto out;
}
- VN_HOLD(vp);
- error = xfs_acl_allow_set(vp, kind);
- if (error)
+ acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount));
+ if (IS_ERR(acl))
goto out;
- /* Incoming ACL exists, set file mode based on its value */
- if (kind == _ACL_TYPE_ACCESS)
- xfs_acl_setmode(vp, xfs_acl, &basicperms);
-
- /*
- * If we have more than std unix permissions, set up the actual attr.
- * Otherwise, delete any existing attr. This prevents us from
- * having actual attrs for permissions that can be stored in the
- * standard permission bits.
- */
- if (!basicperms) {
- xfs_acl_set_attr(vp, xfs_acl, kind, &error);
- } else {
- xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
- }
-
+out_update_cache:
+ set_cached_acl(inode, type, acl);
out:
- VN_RELE(vp);
- _ACL_FREE(xfs_acl);
- return -error;
+ kmem_free(xfs_acl);
+ return acl;
}
-int
-xfs_acl_iaccess(
- xfs_inode_t *ip,
- mode_t mode,
- cred_t *cr)
+STATIC int
+__xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
{
- xfs_acl_t *acl;
- int rval;
-
- if (!(_ACL_ALLOC(acl)))
- return -1;
-
- /* If the file has no ACL return -1. */
- rval = sizeof(xfs_acl_t);
- if (xfs_attr_fetch(ip, SGI_ACL_FILE, SGI_ACL_FILE_SIZE,
- (char *)acl, &rval, ATTR_ROOT | ATTR_KERNACCESS, cr)) {
- _ACL_FREE(acl);
- return -1;
- }
- xfs_acl_get_endian(acl);
+ struct xfs_inode *ip = XFS_I(inode);
+ unsigned char *ea_name;
+ int error;
- /* If the file has an empty ACL return -1. */
- if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) {
- _ACL_FREE(acl);
- return -1;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ea_name = SGI_ACL_FILE;
+ break;
+ case ACL_TYPE_DEFAULT:
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ ea_name = SGI_ACL_DEFAULT;
+ break;
+ default:
+ return -EINVAL;
}
- /* Synchronize ACL with mode bits */
- xfs_acl_sync_mode(ip->i_d.di_mode, acl);
+ if (acl) {
+ struct xfs_acl *xfs_acl;
+ int len = XFS_ACL_MAX_SIZE(ip->i_mount);
- rval = xfs_acl_access(ip->i_d.di_uid, ip->i_d.di_gid, acl, mode, cr);
- _ACL_FREE(acl);
- return rval;
-}
+ xfs_acl = kmem_zalloc_large(len, KM_SLEEP);
+ if (!xfs_acl)
+ return -ENOMEM;
-STATIC int
-xfs_acl_allow_set(
- vnode_t *vp,
- int kind)
-{
- vattr_t va;
- int error;
-
- if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
- return EPERM;
- if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp))
- return ENOTDIR;
- if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
- return EROFS;
- va.va_mask = XFS_AT_UID;
- VOP_GETATTR(vp, &va, 0, NULL, error);
- if (error)
- return error;
- if (va.va_uid != current->fsuid && !capable(CAP_FOWNER))
- return EPERM;
- return error;
-}
-
-/*
- * The access control process to determine the access permission:
- * if uid == file owner id, use the file owner bits.
- * if gid == file owner group id, use the file group bits.
- * scan ACL for a maching user or group, and use matched entry
- * permission. Use total permissions of all matching group entries,
- * until all acl entries are exhausted. The final permission produced
- * by matching acl entry or entries needs to be & with group permission.
- * if not owner, owning group, or matching entry in ACL, use file
- * other bits.
- */
-STATIC int
-xfs_acl_capability_check(
- mode_t mode,
- cred_t *cr)
-{
- if ((mode & ACL_READ) && !capable_cred(cr, CAP_DAC_READ_SEARCH))
- return EACCES;
- if ((mode & ACL_WRITE) && !capable_cred(cr, CAP_DAC_OVERRIDE))
- return EACCES;
- if ((mode & ACL_EXECUTE) && !capable_cred(cr, CAP_DAC_OVERRIDE))
- return EACCES;
-
- return 0;
-}
+ xfs_acl_to_disk(xfs_acl, acl);
-/*
- * Note: cr is only used here for the capability check if the ACL test fails.
- * It is not used to find out the credentials uid or groups etc, as was
- * done in IRIX. It is assumed that the uid and groups for the current
- * thread are taken from "current" instead of the cr parameter.
- */
-STATIC int
-xfs_acl_access(
- uid_t fuid,
- gid_t fgid,
- xfs_acl_t *fap,
- mode_t md,
- cred_t *cr)
-{
- xfs_acl_entry_t matched;
- int i, allows;
- int maskallows = -1; /* true, but not 1, either */
- int seen_userobj = 0;
+ /* subtract away the unused acl entries */
+ len -= sizeof(struct xfs_acl_entry) *
+ (XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count);
- matched.ae_tag = 0; /* Invalid type */
- matched.ae_perm = 0;
- md >>= 6; /* Normalize the bits for comparison */
+ error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
+ len, ATTR_ROOT);
- for (i = 0; i < fap->acl_cnt; i++) {
+ kmem_free(xfs_acl);
+ } else {
/*
- * Break out if we've got a user_obj entry or
- * a user entry and the mask (and have processed USER_OBJ)
+ * A NULL ACL argument means we want to remove the ACL.
*/
- if (matched.ae_tag == ACL_USER_OBJ)
- break;
- if (matched.ae_tag == ACL_USER) {
- if (maskallows != -1 && seen_userobj)
- break;
- if (fap->acl_entry[i].ae_tag != ACL_MASK &&
- fap->acl_entry[i].ae_tag != ACL_USER_OBJ)
- continue;
- }
- /* True if this entry allows the requested access */
- allows = ((fap->acl_entry[i].ae_perm & md) == md);
+ error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
- switch (fap->acl_entry[i].ae_tag) {
- case ACL_USER_OBJ:
- seen_userobj = 1;
- if (fuid != current->fsuid)
- continue;
- matched.ae_tag = ACL_USER_OBJ;
- matched.ae_perm = allows;
- break;
- case ACL_USER:
- if (fap->acl_entry[i].ae_id != current->fsuid)
- continue;
- matched.ae_tag = ACL_USER;
- matched.ae_perm = allows;
- break;
- case ACL_GROUP_OBJ:
- if ((matched.ae_tag == ACL_GROUP_OBJ ||
- matched.ae_tag == ACL_GROUP) && !allows)
- continue;
- if (!in_group_p(fgid))
- continue;
- matched.ae_tag = ACL_GROUP_OBJ;
- matched.ae_perm = allows;
- break;
- case ACL_GROUP:
- if ((matched.ae_tag == ACL_GROUP_OBJ ||
- matched.ae_tag == ACL_GROUP) && !allows)
- continue;
- if (!in_group_p(fap->acl_entry[i].ae_id))
- continue;
- matched.ae_tag = ACL_GROUP;
- matched.ae_perm = allows;
- break;
- case ACL_MASK:
- maskallows = allows;
- break;
- case ACL_OTHER:
- if (matched.ae_tag != 0)
- continue;
- matched.ae_tag = ACL_OTHER;
- matched.ae_perm = allows;
- break;
- }
- }
- /*
- * First possibility is that no matched entry allows access.
- * The capability to override DAC may exist, so check for it.
- */
- switch (matched.ae_tag) {
- case ACL_OTHER:
- case ACL_USER_OBJ:
- if (matched.ae_perm)
- return 0;
- break;
- case ACL_USER:
- case ACL_GROUP_OBJ:
- case ACL_GROUP:
- if (maskallows && matched.ae_perm)
- return 0;
- break;
- case 0:
- break;
+ /*
+ * If the attribute didn't exist to start with that's fine.
+ */
+ if (error == -ENOATTR)
+ error = 0;
}
- return xfs_acl_capability_check(md, cr);
+ if (!error)
+ set_cached_acl(inode, type, acl);
+ return error;
}
-/*
- * ACL validity checker.
- * This acl validation routine checks each ACL entry read in makes sense.
- */
-STATIC int
-xfs_acl_invalid(
- xfs_acl_t *aclp)
+static int
+xfs_set_mode(struct inode *inode, umode_t mode)
{
- xfs_acl_entry_t *entry, *e;
- int user = 0, group = 0, other = 0, mask = 0;
- int mask_required = 0;
- int i, j;
+ int error = 0;
- if (!aclp)
- goto acl_invalid;
+ if (mode != inode->i_mode) {
+ struct iattr iattr;
- if (aclp->acl_cnt > XFS_ACL_MAX_ENTRIES)
- goto acl_invalid;
-
- for (i = 0; i < aclp->acl_cnt; i++) {
- entry = &aclp->acl_entry[i];
- switch (entry->ae_tag) {
- case ACL_USER_OBJ:
- if (user++)
- goto acl_invalid;
- break;
- case ACL_GROUP_OBJ:
- if (group++)
- goto acl_invalid;
- break;
- case ACL_OTHER:
- if (other++)
- goto acl_invalid;
- break;
- case ACL_USER:
- case ACL_GROUP:
- for (j = i + 1; j < aclp->acl_cnt; j++) {
- e = &aclp->acl_entry[j];
- if (e->ae_id == entry->ae_id &&
- e->ae_tag == entry->ae_tag)
- goto acl_invalid;
- }
- mask_required++;
- break;
- case ACL_MASK:
- if (mask++)
- goto acl_invalid;
- break;
- default:
- goto acl_invalid;
- }
- }
- if (!user || !group || !other || (mask_required && !mask))
- goto acl_invalid;
- else
- return 0;
-acl_invalid:
- return EINVAL;
-}
+ iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
+ iattr.ia_mode = mode;
+ iattr.ia_ctime = current_fs_time(inode->i_sb);
-/*
- * Do ACL endian conversion.
- */
-STATIC void
-xfs_acl_get_endian(
- xfs_acl_t *aclp)
-{
- xfs_acl_entry_t *ace, *end;
-
- INT_SET(aclp->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
- end = &aclp->acl_entry[0]+aclp->acl_cnt;
- for (ace = &aclp->acl_entry[0]; ace < end; ace++) {
- INT_SET(ace->ae_tag, ARCH_CONVERT, ace->ae_tag);
- INT_SET(ace->ae_id, ARCH_CONVERT, ace->ae_id);
- INT_SET(ace->ae_perm, ARCH_CONVERT, ace->ae_perm);
+ error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
}
-}
-/*
- * Get the ACL from the EA and do endian conversion.
- */
-STATIC void
-xfs_acl_get_attr(
- vnode_t *vp,
- xfs_acl_t *aclp,
- int kind,
- int flags,
- int *error)
-{
- int len = sizeof(xfs_acl_t);
-
- ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
- flags |= ATTR_ROOT;
- VOP_ATTR_GET(vp,
- kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE : SGI_ACL_DEFAULT,
- (char *)aclp, &len, flags, sys_cred, *error);
- if (*error || (flags & ATTR_KERNOVAL))
- return;
- xfs_acl_get_endian(aclp);
+ return error;
}
-/*
- * Set the EA with the ACL and do endian conversion.
- */
-STATIC void
-xfs_acl_set_attr(
- vnode_t *vp,
- xfs_acl_t *aclp,
- int kind,
- int *error)
+static int
+xfs_acl_exists(struct inode *inode, unsigned char *name)
{
- xfs_acl_entry_t *ace, *newace, *end;
- xfs_acl_t *newacl;
- int len;
+ int len = XFS_ACL_MAX_SIZE(XFS_M(inode->i_sb));
- if (!(_ACL_ALLOC(newacl))) {
- *error = ENOMEM;
- return;
- }
-
- len = sizeof(xfs_acl_t) -
- (sizeof(xfs_acl_entry_t) * (XFS_ACL_MAX_ENTRIES - aclp->acl_cnt));
- end = &aclp->acl_entry[0]+aclp->acl_cnt;
- for (ace = &aclp->acl_entry[0], newace = &newacl->acl_entry[0];
- ace < end;
- ace++, newace++) {
- INT_SET(newace->ae_tag, ARCH_CONVERT, ace->ae_tag);
- INT_SET(newace->ae_id, ARCH_CONVERT, ace->ae_id);
- INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
- }
- INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
- VOP_ATTR_SET(vp,
- kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE: SGI_ACL_DEFAULT,
- (char *)newacl, len, ATTR_ROOT, sys_cred, *error);
- _ACL_FREE(newacl);
+ return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
+ ATTR_ROOT|ATTR_KERNOVAL) == 0);
}
int
-xfs_acl_vtoacl(
- vnode_t *vp,
- xfs_acl_t *access_acl,
- xfs_acl_t *default_acl)
+posix_acl_access_exists(struct inode *inode)
{
- vattr_t va;
- int error = 0;
-
- if (access_acl) {
- /*
- * Get the Access ACL and the mode. If either cannot
- * be obtained for some reason, invalidate the access ACL.
- */
- xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
- if (!error) {
- /* Got the ACL, need the mode... */
- va.va_mask = XFS_AT_MODE;
- VOP_GETATTR(vp, &va, 0, sys_cred, error);
- }
-
- if (error)
- access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
- else /* We have a good ACL and the file mode, synchronize. */
- xfs_acl_sync_mode(va.va_mode, access_acl);
- }
-
- if (default_acl) {
- xfs_acl_get_attr(vp, default_acl, _ACL_TYPE_DEFAULT, 0, &error);
- if (error)
- default_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
- }
- return error;
+ return xfs_acl_exists(inode, SGI_ACL_FILE);
}
-/*
- * This function retrieves the parent directory's acl, processes it
- * and lets the child inherit the acl(s) that it should.
- */
int
-xfs_acl_inherit(
- vnode_t *vp,
- vattr_t *vap,
- xfs_acl_t *pdaclp)
+posix_acl_default_exists(struct inode *inode)
{
- xfs_acl_t *cacl;
- int error = 0;
- int basicperms = 0;
-
- /*
- * If the parent does not have a default ACL, or it's an
- * invalid ACL, we're done.
- */
- if (!vp)
- return 0;
- if (!pdaclp || xfs_acl_invalid(pdaclp))
+ if (!S_ISDIR(inode->i_mode))
return 0;
-
- /*
- * Copy the default ACL of the containing directory to
- * the access ACL of the new file and use the mode that
- * was passed in to set up the correct initial values for
- * the u::,g::[m::], and o:: entries. This is what makes
- * umask() "work" with ACL's.
- */
-
- if (!(_ACL_ALLOC(cacl)))
- return ENOMEM;
-
- memcpy(cacl, pdaclp, sizeof(xfs_acl_t));
- xfs_acl_filter_mode(vap->va_mode, cacl);
- xfs_acl_setmode(vp, cacl, &basicperms);
-
- /*
- * Set the Default and Access ACL on the file. The mode is already
- * set on the file, so we don't need to worry about that.
- *
- * If the new file is a directory, its default ACL is a copy of
- * the containing directory's default ACL.
- */
- if (VN_ISDIR(vp))
- xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
- if (!error && !basicperms)
- xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
- _ACL_FREE(cacl);
- return error;
+ return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
}
-/*
- * Set up the correct mode on the file based on the supplied ACL. This
- * makes sure that the mode on the file reflects the state of the
- * u::,g::[m::], and o:: entries in the ACL. Since the mode is where
- * the ACL is going to get the permissions for these entries, we must
- * synchronize the mode whenever we set the ACL on a file.
- */
-STATIC int
-xfs_acl_setmode(
- vnode_t *vp,
- xfs_acl_t *acl,
- int *basicperms)
+int
+xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
- vattr_t va;
- xfs_acl_entry_t *ap;
- xfs_acl_entry_t *gap = NULL;
- int i, error, nomask = 1;
+ int error = 0;
- *basicperms = 1;
-
- if (acl->acl_cnt == XFS_ACL_NOT_PRESENT)
- return 0;
+ if (!acl)
+ goto set_acl;
- /*
- * Copy the u::, g::, o::, and m:: bits from the ACL into the
- * mode. The m:: bits take precedence over the g:: bits.
- */
- va.va_mask = XFS_AT_MODE;
- VOP_GETATTR(vp, &va, 0, sys_cred, error);
- if (error)
+ error = -E2BIG;
+ if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
return error;
- va.va_mask = XFS_AT_MODE;
- va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
- ap = acl->acl_entry;
- for (i = 0; i < acl->acl_cnt; ++i) {
- switch (ap->ae_tag) {
- case ACL_USER_OBJ:
- va.va_mode |= ap->ae_perm << 6;
- break;
- case ACL_GROUP_OBJ:
- gap = ap;
- break;
- case ACL_MASK: /* more than just standard modes */
- nomask = 0;
- va.va_mode |= ap->ae_perm << 3;
- *basicperms = 0;
- break;
- case ACL_OTHER:
- va.va_mode |= ap->ae_perm;
- break;
- default: /* more than just standard modes */
- *basicperms = 0;
- break;
- }
- ap++;
- }
-
- /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
- if (gap && nomask)
- va.va_mode |= gap->ae_perm << 3;
+ if (type == ACL_TYPE_ACCESS) {
+ umode_t mode = inode->i_mode;
+ error = posix_acl_equiv_mode(acl, &mode);
- VOP_SETATTR(vp, &va, 0, sys_cred, error);
- return error;
-}
+ if (error <= 0) {
+ acl = NULL;
-/*
- * The permissions for the special ACL entries (u::, g::[m::], o::) are
- * actually stored in the file mode (if there is both a group and a mask,
- * the group is stored in the ACL entry and the mask is stored on the file).
- * This allows the mode to remain automatically in sync with the ACL without
- * the need for a call-back to the ACL system at every point where the mode
- * could change. This function takes the permissions from the specified mode
- * and places it in the supplied ACL.
- *
- * This implementation draws its validity from the fact that, when the ACL
- * was assigned, the mode was copied from the ACL.
- * If the mode did not change, therefore, the mode remains exactly what was
- * taken from the special ACL entries at assignment.
- * If a subsequent chmod() was done, the POSIX spec says that the change in
- * mode must cause an update to the ACL seen at user level and used for
- * access checks. Before and after a mode change, therefore, the file mode
- * most accurately reflects what the special ACL entries should permit/deny.
- *
- * CAVEAT: If someone sets the SGI_ACL_FILE attribute directly,
- * the existing mode bits will override whatever is in the
- * ACL. Similarly, if there is a pre-existing ACL that was
- * never in sync with its mode (owing to a bug in 6.5 and
- * before), it will now magically (or mystically) be
- * synchronized. This could cause slight astonishment, but
- * it is better than inconsistent permissions.
- *
- * The supplied ACL is a template that may contain any combination
- * of special entries. These are treated as place holders when we fill
- * out the ACL. This routine does not add or remove special entries, it
- * simply unites each special entry with its associated set of permissions.
- */
-STATIC void
-xfs_acl_sync_mode(
- mode_t mode,
- xfs_acl_t *acl)
-{
- int i, nomask = 1;
- xfs_acl_entry_t *ap;
- xfs_acl_entry_t *gap = NULL;
-
- /*
- * Set ACL entries. POSIX1003.1eD16 requires that the MASK
- * be set instead of the GROUP entry, if there is a MASK.
- */
- for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
- switch (ap->ae_tag) {
- case ACL_USER_OBJ:
- ap->ae_perm = (mode >> 6) & 0x7;
- break;
- case ACL_GROUP_OBJ:
- gap = ap;
- break;
- case ACL_MASK:
- nomask = 0;
- ap->ae_perm = (mode >> 3) & 0x7;
- break;
- case ACL_OTHER:
- ap->ae_perm = mode & 0x7;
- break;
- default:
- break;
+ if (error < 0)
+ return error;
}
- }
- /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
- if (gap && nomask)
- gap->ae_perm = (mode >> 3) & 0x7;
-}
-
-/*
- * When inheriting an Access ACL from a directory Default ACL,
- * the ACL bits are set to the intersection of the ACL default
- * permission bits and the file permission bits in mode. If there
- * are no permission bits on the file then we must not give them
- * the ACL. This is what what makes umask() work with ACLs.
- */
-STATIC void
-xfs_acl_filter_mode(
- mode_t mode,
- xfs_acl_t *acl)
-{
- int i, nomask = 1;
- xfs_acl_entry_t *ap;
- xfs_acl_entry_t *gap = NULL;
- /*
- * Set ACL entries. POSIX1003.1eD16 requires that the MASK
- * be merged with GROUP entry, if there is a MASK.
- */
- for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
- switch (ap->ae_tag) {
- case ACL_USER_OBJ:
- ap->ae_perm &= (mode >> 6) & 0x7;
- break;
- case ACL_GROUP_OBJ:
- gap = ap;
- break;
- case ACL_MASK:
- nomask = 0;
- ap->ae_perm &= (mode >> 3) & 0x7;
- break;
- case ACL_OTHER:
- ap->ae_perm &= mode & 0x7;
- break;
- default:
- break;
- }
+ error = xfs_set_mode(inode, mode);
+ if (error)
+ return error;
}
- /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
- if (gap && nomask)
- gap->ae_perm &= (mode >> 3) & 0x7;
+
+ set_acl:
+ return __xfs_set_acl(inode, type, acl);
}
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index f9315bc960c..5dc16374451 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -18,85 +18,58 @@
#ifndef __XFS_ACL_H__
#define __XFS_ACL_H__
-/*
- * Access Control Lists
- */
-typedef __uint16_t xfs_acl_perm_t;
-typedef __int32_t xfs_acl_type_t;
-typedef __int32_t xfs_acl_tag_t;
-typedef __int32_t xfs_acl_id_t;
+struct inode;
+struct posix_acl;
+struct xfs_inode;
-#define XFS_ACL_MAX_ENTRIES 25
#define XFS_ACL_NOT_PRESENT (-1)
-typedef struct xfs_acl_entry {
- xfs_acl_tag_t ae_tag;
- xfs_acl_id_t ae_id;
- xfs_acl_perm_t ae_perm;
-} xfs_acl_entry_t;
+/* On-disk XFS access control list structure */
+struct xfs_acl_entry {
+ __be32 ae_tag;
+ __be32 ae_id;
+ __be16 ae_perm;
+ __be16 ae_pad; /* fill the implicit hole in the structure */
+};
-typedef struct xfs_acl {
- __int32_t acl_cnt;
- xfs_acl_entry_t acl_entry[XFS_ACL_MAX_ENTRIES];
-} xfs_acl_t;
+struct xfs_acl {
+ __be32 acl_cnt;
+ struct xfs_acl_entry acl_entry[0];
+};
+
+/*
+ * The number of ACL entries allowed is defined by the on-disk format.
+ * For v4 superblocks, that is limited to 25 entries. For v5 superblocks, it is
+ * limited only by the maximum size of the xattr that stores the information.
+ */
+#define XFS_ACL_MAX_ENTRIES(mp) \
+ (xfs_sb_version_hascrc(&mp->m_sb) \
+ ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
+ sizeof(struct xfs_acl_entry) \
+ : 25)
+
+#define XFS_ACL_MAX_SIZE(mp) \
+ (sizeof(struct xfs_acl) + \
+ sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp)))
/* On-disk XFS extended attribute names */
-#define SGI_ACL_FILE "SGI_ACL_FILE"
-#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT"
+#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE"
+#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT"
#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
-
#ifdef CONFIG_XFS_POSIX_ACL
-
-struct vattr;
-struct vnode;
-struct xfs_inode;
-
-extern struct kmem_zone *xfs_acl_zone;
-#define xfs_acl_zone_init(zone, name) \
- (zone) = kmem_zone_init(sizeof(xfs_acl_t), name)
-#define xfs_acl_zone_destroy(zone) kmem_cache_destroy(zone)
-
-extern int xfs_acl_inherit(struct vnode *, struct vattr *, xfs_acl_t *);
-extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
-extern int xfs_acl_vtoacl(struct vnode *, xfs_acl_t *, xfs_acl_t *);
-extern int xfs_acl_vhasacl_access(struct vnode *);
-extern int xfs_acl_vhasacl_default(struct vnode *);
-extern int xfs_acl_vset(struct vnode *, void *, size_t, int);
-extern int xfs_acl_vget(struct vnode *, void *, size_t, int);
-extern int xfs_acl_vremove(struct vnode *vp, int);
-
-#define _ACL_TYPE_ACCESS 1
-#define _ACL_TYPE_DEFAULT 2
-#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
-
-#define _ACL_INHERIT(c,v,d) (xfs_acl_inherit(c,v,d))
-#define _ACL_GET_ACCESS(pv,pa) (xfs_acl_vtoacl(pv,pa,NULL) == 0)
-#define _ACL_GET_DEFAULT(pv,pd) (xfs_acl_vtoacl(pv,NULL,pd) == 0)
-#define _ACL_ACCESS_EXISTS xfs_acl_vhasacl_access
-#define _ACL_DEFAULT_EXISTS xfs_acl_vhasacl_default
-#define _ACL_XFS_IACCESS(i,m,c) (XFS_IFORK_Q(i) ? xfs_acl_iaccess(i,m,c) : -1)
-
-#define _ACL_ALLOC(a) ((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP))
-#define _ACL_FREE(a) ((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0)
-
+extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
+extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int posix_acl_access_exists(struct inode *inode);
+extern int posix_acl_default_exists(struct inode *inode);
#else
-#define xfs_acl_zone_init(zone,name)
-#define xfs_acl_zone_destroy(zone)
-#define xfs_acl_vset(v,p,sz,t) (-EOPNOTSUPP)
-#define xfs_acl_vget(v,p,sz,t) (-EOPNOTSUPP)
-#define xfs_acl_vremove(v,t) (-EOPNOTSUPP)
-#define xfs_acl_vhasacl_access(v) (0)
-#define xfs_acl_vhasacl_default(v) (0)
-#define _ACL_ALLOC(a) (1) /* successfully allocate nothing */
-#define _ACL_FREE(a) ((void)0)
-#define _ACL_INHERIT(c,v,d) (0)
-#define _ACL_GET_ACCESS(pv,pa) (0)
-#define _ACL_GET_DEFAULT(pv,pd) (0)
-#define _ACL_ACCESS_EXISTS (NULL)
-#define _ACL_DEFAULT_EXISTS (NULL)
-#define _ACL_XFS_IACCESS(i,m,c) (-1)
-#endif
-
+static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
+{
+ return NULL;
+}
+# define xfs_set_acl NULL
+# define posix_acl_access_exists(inode) 0
+# define posix_acl_default_exists(inode) 0
+#endif /* CONFIG_XFS_POSIX_ACL */
#endif /* __XFS_ACL_H__ */
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index a96e2ffce0c..6e247a99f5d 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -30,6 +30,7 @@ struct xfs_trans;
#define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */
#define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */
+#define XFS_AGFL_MAGIC 0x5841464c /* 'XAFL' */
#define XFS_AGF_VERSION 1
#define XFS_AGI_VERSION 1
@@ -63,13 +64,33 @@ typedef struct xfs_agf {
__be32 agf_spare0; /* spare field */
__be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */
__be32 agf_spare1; /* spare field */
+
__be32 agf_flfirst; /* first freelist block's index */
__be32 agf_fllast; /* last freelist block's index */
__be32 agf_flcount; /* count of blocks in freelist */
__be32 agf_freeblks; /* total free blocks */
+
__be32 agf_longest; /* longest free space */
+ __be32 agf_btreeblks; /* # of blocks held in AGF btrees */
+ uuid_t agf_uuid; /* uuid of filesystem */
+
+ /*
+ * reserve some contiguous space for future logged fields before we add
+ * the unlogged fields. This makes the range logging via flags and
+ * structure offsets much simpler.
+ */
+ __be64 agf_spare64[16];
+
+ /* unlogged fields, written during buffer writeback. */
+ __be64 agf_lsn; /* last write sequence */
+ __be32 agf_crc; /* crc of agf sector */
+ __be32 agf_spare2;
+
+ /* structure must be padded to 64 bit alignment */
} xfs_agf_t;
+#define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc)
+
#define XFS_AGF_MAGICNUM 0x00000001
#define XFS_AGF_VERSIONNUM 0x00000002
#define XFS_AGF_SEQNO 0x00000004
@@ -81,14 +102,33 @@ typedef struct xfs_agf {
#define XFS_AGF_FLCOUNT 0x00000100
#define XFS_AGF_FREEBLKS 0x00000200
#define XFS_AGF_LONGEST 0x00000400
-#define XFS_AGF_NUM_BITS 11
+#define XFS_AGF_BTREEBLKS 0x00000800
+#define XFS_AGF_UUID 0x00001000
+#define XFS_AGF_NUM_BITS 13
#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1)
+#define XFS_AGF_FLAGS \
+ { XFS_AGF_MAGICNUM, "MAGICNUM" }, \
+ { XFS_AGF_VERSIONNUM, "VERSIONNUM" }, \
+ { XFS_AGF_SEQNO, "SEQNO" }, \
+ { XFS_AGF_LENGTH, "LENGTH" }, \
+ { XFS_AGF_ROOTS, "ROOTS" }, \
+ { XFS_AGF_LEVELS, "LEVELS" }, \
+ { XFS_AGF_FLFIRST, "FLFIRST" }, \
+ { XFS_AGF_FLLAST, "FLLAST" }, \
+ { XFS_AGF_FLCOUNT, "FLCOUNT" }, \
+ { XFS_AGF_FREEBLKS, "FREEBLKS" }, \
+ { XFS_AGF_LONGEST, "LONGEST" }, \
+ { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \
+ { XFS_AGF_UUID, "UUID" }
+
/* disk block (xfs_daddr_t) in the AG */
#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
-#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)((bp)->b_addr))
+extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
/*
* Size of the unlinked inode hash table in the agi.
@@ -112,6 +152,7 @@ typedef struct xfs_agi {
__be32 agi_root; /* root of inode btree */
__be32 agi_level; /* levels in inode btree */
__be32 agi_freecount; /* number of free inodes */
+
__be32 agi_newino; /* new inode just allocated */
__be32 agi_dirino; /* last directory inode chunk */
/*
@@ -119,26 +160,46 @@ typedef struct xfs_agi {
* still being referenced.
*/
__be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
+ /*
+ * This marks the end of logging region 1 and start of logging region 2.
+ */
+ uuid_t agi_uuid; /* uuid of filesystem */
+ __be32 agi_crc; /* crc of agi sector */
+ __be32 agi_pad32;
+ __be64 agi_lsn; /* last write sequence */
+
+ __be32 agi_free_root; /* root of the free inode btree */
+ __be32 agi_free_level;/* levels in free inode btree */
+
+ /* structure must be padded to 64 bit alignment */
} xfs_agi_t;
-#define XFS_AGI_MAGICNUM 0x00000001
-#define XFS_AGI_VERSIONNUM 0x00000002
-#define XFS_AGI_SEQNO 0x00000004
-#define XFS_AGI_LENGTH 0x00000008
-#define XFS_AGI_COUNT 0x00000010
-#define XFS_AGI_ROOT 0x00000020
-#define XFS_AGI_LEVEL 0x00000040
-#define XFS_AGI_FREECOUNT 0x00000080
-#define XFS_AGI_NEWINO 0x00000100
-#define XFS_AGI_DIRINO 0x00000200
-#define XFS_AGI_UNLINKED 0x00000400
-#define XFS_AGI_NUM_BITS 11
-#define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1)
+#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc)
+
+#define XFS_AGI_MAGICNUM (1 << 0)
+#define XFS_AGI_VERSIONNUM (1 << 1)
+#define XFS_AGI_SEQNO (1 << 2)
+#define XFS_AGI_LENGTH (1 << 3)
+#define XFS_AGI_COUNT (1 << 4)
+#define XFS_AGI_ROOT (1 << 5)
+#define XFS_AGI_LEVEL (1 << 6)
+#define XFS_AGI_FREECOUNT (1 << 7)
+#define XFS_AGI_NEWINO (1 << 8)
+#define XFS_AGI_DIRINO (1 << 9)
+#define XFS_AGI_UNLINKED (1 << 10)
+#define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */
+#define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1)
+#define XFS_AGI_FREE_ROOT (1 << 11)
+#define XFS_AGI_FREE_LEVEL (1 << 12)
+#define XFS_AGI_NUM_BITS_R2 13
/* disk block (xfs_daddr_t) in the AG */
#define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
#define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp))
-#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)((bp)->b_addr))
+
+extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno, struct xfs_buf **bpp);
/*
* The third a.g. block contains the a.g. freelist, an array
@@ -146,53 +207,42 @@ typedef struct xfs_agi {
*/
#define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log))
#define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp))
-#define XFS_AGFL_SIZE(mp) ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t))
-#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr))
-typedef struct xfs_agfl {
- xfs_agblock_t agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */
-} xfs_agfl_t;
+#define XFS_BUF_TO_AGFL_BNO(mp, bp) \
+ (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
+ &(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \
+ (__be32 *)(bp)->b_addr)
/*
- * Busy block/extent entry. Used in perag to mark blocks that have been freed
- * but whose transactions aren't committed to disk yet.
+ * Size of the AGFL. For CRC-enabled filesystes we steal a couple of
+ * slots in the beginning of the block for a proper header with the
+ * location information and CRC.
*/
-typedef struct xfs_perag_busy {
- xfs_agblock_t busy_start;
- xfs_extlen_t busy_length;
- struct xfs_trans *busy_tp; /* transaction that did the free */
-} xfs_perag_busy_t;
+#define XFS_AGFL_SIZE(mp) \
+ (((mp)->m_sb.sb_sectsize - \
+ (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
+ sizeof(struct xfs_agfl) : 0)) / \
+ sizeof(xfs_agblock_t))
+
+typedef struct xfs_agfl {
+ __be32 agfl_magicnum;
+ __be32 agfl_seqno;
+ uuid_t agfl_uuid;
+ __be64 agfl_lsn;
+ __be32 agfl_crc;
+ __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
+} xfs_agfl_t;
+
+#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
/*
- * Per-ag incore structure, copies of information in agf and agi,
- * to improve the performance of allocation group selection.
- *
- * pick sizes which fit in allocation buckets well
+ * tags for inode radix tree
*/
-#if (BITS_PER_LONG == 32)
-#define XFS_PAGB_NUM_SLOTS 84
-#elif (BITS_PER_LONG == 64)
-#define XFS_PAGB_NUM_SLOTS 128
-#endif
-
-typedef struct xfs_perag
-{
- char pagf_init; /* this agf's entry is initialized */
- char pagi_init; /* this agi's entry is initialized */
- char pagf_metadata; /* the agf is prefered to be metadata */
- char pagi_inodeok; /* The agi is ok for inodes */
- __uint8_t pagf_levels[XFS_BTNUM_AGF];
- /* # of levels in bno & cnt btree */
- __uint32_t pagf_flcount; /* count of blocks in freelist */
- xfs_extlen_t pagf_freeblks; /* total free blocks */
- xfs_extlen_t pagf_longest; /* longest free space */
- xfs_agino_t pagi_freecount; /* number of free inodes */
-#ifdef __KERNEL__
- lock_t pagb_lock; /* lock for pagb_list */
-#endif
- int pagb_count; /* pagb slots in use */
- xfs_perag_busy_t *pagb_list; /* unstable blocks */
-} xfs_perag_t;
+#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup
+ in xfs_inode_ag_iterator */
+#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
+#define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */
#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
@@ -203,15 +253,15 @@ typedef struct xfs_perag
be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp))
#define XFS_MIN_FREELIST_PAG(pag,mp) \
(XFS_MIN_FREELIST_RAW( \
- (uint_t)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
- (uint_t)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp))
+ (unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
+ (unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp))
#define XFS_AGB_TO_FSB(mp,agno,agbno) \
(((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno))
#define XFS_FSB_TO_AGNO(mp,fsbno) \
((xfs_agnumber_t)((fsbno) >> (mp)->m_sb.sb_agblklog))
#define XFS_FSB_TO_AGBNO(mp,fsbno) \
- ((xfs_agblock_t)((fsbno) & XFS_MASK32LO((mp)->m_sb.sb_agblklog)))
+ ((xfs_agblock_t)((fsbno) & xfs_mask32lo((mp)->m_sb.sb_agblklog)))
#define XFS_AGB_TO_DADDR(mp,agno,agbno) \
((xfs_daddr_t)XFS_FSB_TO_BB(mp, \
(xfs_fsblock_t)(agno) * (mp)->m_sb.sb_agblocks + (agbno)))
@@ -224,8 +274,8 @@ typedef struct xfs_perag
#define XFS_AG_CHECK_DADDR(mp,d,len) \
((len) == 1 ? \
ASSERT((d) == XFS_SB_DADDR || \
- XFS_DADDR_TO_AGBNO(mp, d) != XFS_SB_DADDR) : \
- ASSERT(XFS_DADDR_TO_AGNO(mp, d) == \
- XFS_DADDR_TO_AGNO(mp, (d) + (len) - 1)))
+ xfs_daddr_to_agbno(mp, d) != XFS_SB_DADDR) : \
+ ASSERT(xfs_daddr_to_agno(mp, d) == \
+ xfs_daddr_to_agno(mp, (d) + (len) - 1)))
#endif /* __XFS_AG_H__ */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index f4328e1e2a7..d43813267a8 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -17,108 +17,153 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_types.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_shared.h"
+#include "xfs_trans_resv.h"
#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_ialloc.h"
+#include "xfs_alloc_btree.h"
#include "xfs_alloc.h"
+#include "xfs_extent_busy.h"
#include "xfs_error.h"
+#include "xfs_cksum.h"
+#include "xfs_trace.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_log.h"
+struct workqueue_struct *xfs_alloc_wq;
#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
#define XFSA_FIXUP_BNO_OK 1
#define XFSA_FIXUP_CNT_OK 2
-STATIC int
-xfs_alloc_search_busy(xfs_trans_t *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len);
-
-#if defined(XFS_ALLOC_TRACE)
-ktrace_t *xfs_alloc_trace_buf;
-
-#define TRACE_ALLOC(s,a) \
- xfs_alloc_trace_alloc(fname, s, a, __LINE__)
-#define TRACE_FREE(s,a,b,x,f) \
- xfs_alloc_trace_free(fname, s, mp, a, b, x, f, __LINE__)
-#define TRACE_MODAGF(s,a,f) \
- xfs_alloc_trace_modagf(fname, s, mp, a, f, __LINE__)
-#define TRACE_BUSY(fname,s,ag,agb,l,sl,tp) \
- xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
-#define TRACE_UNBUSY(fname,s,ag,sl,tp) \
- xfs_alloc_trace_busy(fname, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
-#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) \
- xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
-#else
-#define TRACE_ALLOC(s,a)
-#define TRACE_FREE(s,a,b,x,f)
-#define TRACE_MODAGF(s,a,f)
-#define TRACE_BUSY(s,a,ag,agb,l,sl,tp)
-#define TRACE_UNBUSY(fname,s,ag,sl,tp)
-#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp)
-#endif /* XFS_ALLOC_TRACE */
-
-/*
- * Prototypes for per-ag allocation routines
- */
-
STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
- xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
+ xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
+
+/*
+ * Lookup the record equal to [bno, len] in the btree given by cur.
+ */
+STATIC int /* error */
+xfs_alloc_lookup_eq(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
+ * Lookup the first record greater than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int /* error */
+xfs_alloc_lookup_ge(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+}
/*
- * Internal functions.
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
*/
+int /* error */
+xfs_alloc_lookup_le(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int /* error */
+xfs_alloc_update(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len) /* length of extent */
+{
+ union xfs_btree_rec rec;
+
+ rec.alloc.ar_startblock = cpu_to_be32(bno);
+ rec.alloc.ar_blockcount = cpu_to_be32(len);
+ return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int /* error */
+xfs_alloc_get_rec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t *bno, /* output: starting block of extent */
+ xfs_extlen_t *len, /* output: length of extent */
+ int *stat) /* output: success/failure */
+{
+ union xfs_btree_rec *rec;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (!error && *stat == 1) {
+ *bno = be32_to_cpu(rec->alloc.ar_startblock);
+ *len = be32_to_cpu(rec->alloc.ar_blockcount);
+ }
+ return error;
+}
/*
* Compute aligned version of the found extent.
* Takes alignment and min length into account.
*/
-STATIC int /* success (>= minlen) */
+STATIC void
xfs_alloc_compute_aligned(
+ xfs_alloc_arg_t *args, /* allocation argument structure */
xfs_agblock_t foundbno, /* starting block in found extent */
xfs_extlen_t foundlen, /* length in found extent */
- xfs_extlen_t alignment, /* alignment for allocation */
- xfs_extlen_t minlen, /* minimum length for allocation */
xfs_agblock_t *resbno, /* result block number */
xfs_extlen_t *reslen) /* result length */
{
xfs_agblock_t bno;
- xfs_extlen_t diff;
xfs_extlen_t len;
- if (alignment > 1 && foundlen >= minlen) {
- bno = roundup(foundbno, alignment);
- diff = bno - foundbno;
- len = diff >= foundlen ? 0 : foundlen - diff;
+ /* Trim busy sections out of found extent */
+ xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
+
+ if (args->alignment > 1 && len >= args->minlen) {
+ xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
+ xfs_extlen_t diff = aligned_bno - bno;
+
+ *resbno = aligned_bno;
+ *reslen = diff >= len ? 0 : len - diff;
} else {
- bno = foundbno;
- len = foundlen;
+ *resbno = bno;
+ *reslen = len;
}
- *resbno = bno;
- *reslen = len;
- return len >= minlen;
}
/*
@@ -130,6 +175,7 @@ xfs_alloc_compute_diff(
xfs_agblock_t wantbno, /* target starting block */
xfs_extlen_t wantlen, /* target length */
xfs_extlen_t alignment, /* target alignment */
+ char userdata, /* are we allocating data? */
xfs_agblock_t freebno, /* freespace's starting block */
xfs_extlen_t freelen, /* freespace's length */
xfs_agblock_t *newbnop) /* result: best start block from free */
@@ -144,7 +190,14 @@ xfs_alloc_compute_diff(
ASSERT(freelen >= wantlen);
freeend = freebno + freelen;
wantend = wantbno + wantlen;
- if (freebno >= wantbno) {
+ /*
+ * We want to allocate from the start of a free extent if it is past
+ * the desired block or if we are allocating user data and the free
+ * extent is before desired block. The second case is there to allow
+ * for contiguous allocation from the remaining free space if the file
+ * grows in the short term.
+ */
+ if (freebno >= wantbno || (userdata && freeend < wantend)) {
if ((newbno1 = roundup(freebno, alignment)) >= freeend)
newbno1 = NULLAGBLOCK;
} else if (freeend >= wantend && alignment > 1) {
@@ -204,16 +257,14 @@ xfs_alloc_fix_len(
k = rlen % args->prod;
if (k == args->mod)
return;
- if (k > args->mod) {
- if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
- return;
- } else {
- if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
- (int)args->minlen)
- return;
- }
- ASSERT(rlen >= args->minlen);
- ASSERT(rlen <= args->maxlen);
+ if (k > args->mod)
+ rlen = rlen - (k - args->mod);
+ else
+ rlen = rlen - args->prod + (args->mod - k);
+ if ((int)rlen < (int)args->minlen)
+ return;
+ ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
+ ASSERT(rlen % args->prod == args->mod);
args->len = rlen;
}
@@ -232,7 +283,6 @@ xfs_alloc_fix_minleft(
return 1;
agf = XFS_BUF_TO_AGF(args->agbp);
diff = be32_to_cpu(agf->agf_freeblks)
- + be32_to_cpu(agf->agf_flcount)
- args->len - args->minleft;
if (diff >= 0)
return 1;
@@ -297,21 +347,20 @@ xfs_alloc_fixup_trees(
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
+
#ifdef DEBUG
- {
- xfs_alloc_block_t *bnoblock;
- xfs_alloc_block_t *cntblock;
-
- if (bno_cur->bc_nlevels == 1 &&
- cnt_cur->bc_nlevels == 1) {
- bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]);
- cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]);
- XFS_WANT_CORRUPTED_RETURN(
- be16_to_cpu(bnoblock->bb_numrecs) ==
- be16_to_cpu(cntblock->bb_numrecs));
- }
+ if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
+ struct xfs_btree_block *bnoblock;
+ struct xfs_btree_block *cntblock;
+
+ bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
+ cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
+
+ XFS_WANT_CORRUPTED_RETURN(
+ bnoblock->bb_numrecs == cntblock->bb_numrecs);
}
#endif
+
/*
* Deal with all four cases: the allocated record is contained
* within the freespace record, so we can have new freespace
@@ -336,7 +385,7 @@ xfs_alloc_fixup_trees(
/*
* Delete the entry from the by-size btree.
*/
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
/*
@@ -346,7 +395,7 @@ xfs_alloc_fixup_trees(
if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 0);
- if ((error = xfs_alloc_insert(cnt_cur, &i)))
+ if ((error = xfs_btree_insert(cnt_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
@@ -354,7 +403,7 @@ xfs_alloc_fixup_trees(
if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 0);
- if ((error = xfs_alloc_insert(cnt_cur, &i)))
+ if ((error = xfs_btree_insert(cnt_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
@@ -365,7 +414,7 @@ xfs_alloc_fixup_trees(
/*
* No remaining freespace, just delete the by-block tree entry.
*/
- if ((error = xfs_alloc_delete(bno_cur, &i)))
+ if ((error = xfs_btree_delete(bno_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
} else {
@@ -382,13 +431,94 @@ xfs_alloc_fixup_trees(
if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 0);
- if ((error = xfs_alloc_insert(bno_cur, &i)))
+ if ((error = xfs_btree_insert(bno_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
return 0;
}
+static bool
+xfs_agfl_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
+ int i;
+
+ if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
+ return false;
+ if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
+ return false;
+ /*
+ * during growfs operations, the perag is not fully initialised,
+ * so we can't use it for any useful checking. growfs ensures we can't
+ * use it by using uncached buffers that don't have the perag attached
+ * so we can detect and avoid this problem.
+ */
+ if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
+ return false;
+
+ for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
+ if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
+ be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
+ return false;
+ }
+ return true;
+}
+
+static void
+xfs_agfl_read_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ /*
+ * There is no verification of non-crc AGFLs because mkfs does not
+ * initialise the AGFL to zero or NULL. Hence the only valid part of the
+ * AGFL is what the AGF says is active. We can't get to the AGF, so we
+ * can't verify just those entries are valid.
+ */
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return;
+
+ if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_agfl_verify(bp))
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
+}
+
+static void
+xfs_agfl_write_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+ /* no verification of non-crc AGFLs */
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return;
+
+ if (!xfs_agfl_verify(bp)) {
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
+ }
+
+ if (bip)
+ XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+
+ xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
+}
+
+const struct xfs_buf_ops xfs_agfl_buf_ops = {
+ .verify_read = xfs_agfl_read_verify,
+ .verify_write = xfs_agfl_write_verify,
+};
+
/*
* Read in the allocation group free block array.
*/
@@ -406,133 +536,34 @@ xfs_alloc_read_agfl(
error = xfs_trans_read_buf(
mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1), 0, &bp);
+ XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
if (error)
return error;
- ASSERT(bp);
- ASSERT(!XFS_BUF_GETERROR(bp));
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
+ xfs_buf_set_ref(bp, XFS_AGFL_REF);
*bpp = bp;
return 0;
}
-#if defined(XFS_ALLOC_TRACE)
-/*
- * Add an allocation trace entry for an alloc call.
- */
-STATIC void
-xfs_alloc_trace_alloc(
- char *name, /* function tag string */
- char *str, /* additional string */
- xfs_alloc_arg_t *args, /* allocation argument structure */
- int line) /* source line number */
+STATIC int
+xfs_alloc_update_counters(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_buf *agbp,
+ long len)
{
- ktrace_enter(xfs_alloc_trace_buf,
- (void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)),
- (void *)name,
- (void *)str,
- (void *)args->mp,
- (void *)(__psunsigned_t)args->agno,
- (void *)(__psunsigned_t)args->agbno,
- (void *)(__psunsigned_t)args->minlen,
- (void *)(__psunsigned_t)args->maxlen,
- (void *)(__psunsigned_t)args->mod,
- (void *)(__psunsigned_t)args->prod,
- (void *)(__psunsigned_t)args->minleft,
- (void *)(__psunsigned_t)args->total,
- (void *)(__psunsigned_t)args->alignment,
- (void *)(__psunsigned_t)args->len,
- (void *)((((__psint_t)args->type) << 16) |
- (__psint_t)args->otype),
- (void *)(__psint_t)((args->wasdel << 3) |
- (args->wasfromfl << 2) |
- (args->isfl << 1) |
- (args->userdata << 0)));
-}
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
-/*
- * Add an allocation trace entry for a free call.
- */
-STATIC void
-xfs_alloc_trace_free(
- char *name, /* function tag string */
- char *str, /* additional string */
- xfs_mount_t *mp, /* file system mount point */
- xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno, /* a.g. relative block number */
- xfs_extlen_t len, /* length of extent */
- int isfl, /* set if is freelist allocation/free */
- int line) /* source line number */
-{
- ktrace_enter(xfs_alloc_trace_buf,
- (void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)),
- (void *)name,
- (void *)str,
- (void *)mp,
- (void *)(__psunsigned_t)agno,
- (void *)(__psunsigned_t)agbno,
- (void *)(__psunsigned_t)len,
- (void *)(__psint_t)isfl,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
-}
+ pag->pagf_freeblks += len;
+ be32_add_cpu(&agf->agf_freeblks, len);
-/*
- * Add an allocation trace entry for modifying an agf.
- */
-STATIC void
-xfs_alloc_trace_modagf(
- char *name, /* function tag string */
- char *str, /* additional string */
- xfs_mount_t *mp, /* file system mount point */
- xfs_agf_t *agf, /* new agf value */
- int flags, /* logging flags for agf */
- int line) /* source line number */
-{
- ktrace_enter(xfs_alloc_trace_buf,
- (void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)),
- (void *)name,
- (void *)str,
- (void *)mp,
- (void *)(__psint_t)flags,
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_seqno),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_length),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flfirst),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_fllast),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flcount),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_freeblks),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_longest));
-}
+ xfs_trans_agblocks_delta(tp, len);
+ if (unlikely(be32_to_cpu(agf->agf_freeblks) >
+ be32_to_cpu(agf->agf_length)))
+ return EFSCORRUPTED;
-STATIC void
-xfs_alloc_trace_busy(
- char *name, /* function tag string */
- char *str, /* additional string */
- xfs_mount_t *mp, /* file system mount poing */
- xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno, /* a.g. relative block number */
- xfs_extlen_t len, /* length of extent */
- int slot, /* perag Busy slot */
- xfs_trans_t *tp,
- int trtype, /* type: add, delete, search */
- int line) /* source line number */
-{
- ktrace_enter(xfs_alloc_trace_buf,
- (void *)(__psint_t)(trtype | (line << 16)),
- (void *)name,
- (void *)str,
- (void *)mp,
- (void *)(__psunsigned_t)agno,
- (void *)(__psunsigned_t)agbno,
- (void *)(__psunsigned_t)len,
- (void *)(__psint_t)slot,
- (void *)tp,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+ xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
+ return 0;
}
-#endif /* XFS_ALLOC_TRACE */
/*
* Allocation group level functions.
@@ -551,9 +582,6 @@ xfs_alloc_ag_vextent(
xfs_alloc_arg_t *args) /* argument structure for allocation */
{
int error=0;
-#ifdef XFS_ALLOC_TRACE
- static char fname[] = "xfs_alloc_ag_vextent";
-#endif
ASSERT(args->minlen > 0);
ASSERT(args->maxlen > 0);
@@ -578,46 +606,36 @@ xfs_alloc_ag_vextent(
ASSERT(0);
/* NOTREACHED */
}
- if (error)
+
+ if (error || args->agbno == NULLAGBLOCK)
return error;
- /*
- * If the allocation worked, need to change the agf structure
- * (and log it), and the superblock.
- */
- if (args->agbno != NULLAGBLOCK) {
- xfs_agf_t *agf; /* allocation group freelist header */
-#ifdef XFS_ALLOC_TRACE
- xfs_mount_t *mp = args->mp;
-#endif
- long slen = (long)args->len;
- ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
- ASSERT(!(args->wasfromfl) || !args->isfl);
- ASSERT(args->agbno % args->alignment == 0);
- if (!(args->wasfromfl)) {
-
- agf = XFS_BUF_TO_AGF(args->agbp);
- be32_add(&agf->agf_freeblks, -(args->len));
- xfs_trans_agblocks_delta(args->tp,
- -((long)(args->len)));
- args->pag->pagf_freeblks -= args->len;
- ASSERT(be32_to_cpu(agf->agf_freeblks) <=
- be32_to_cpu(agf->agf_length));
- TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
- xfs_alloc_log_agf(args->tp, args->agbp,
- XFS_AGF_FREEBLKS);
- /* search the busylist for these blocks */
- xfs_alloc_search_busy(args->tp, args->agno,
- args->agbno, args->len);
- }
- if (!args->isfl)
- xfs_trans_mod_sb(args->tp,
- args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
- XFS_TRANS_SB_FDBLOCKS, -slen);
- XFS_STATS_INC(xs_allocx);
- XFS_STATS_ADD(xs_allocb, args->len);
+ ASSERT(args->len >= args->minlen);
+ ASSERT(args->len <= args->maxlen);
+ ASSERT(!args->wasfromfl || !args->isfl);
+ ASSERT(args->agbno % args->alignment == 0);
+
+ if (!args->wasfromfl) {
+ error = xfs_alloc_update_counters(args->tp, args->pag,
+ args->agbp,
+ -((long)(args->len)));
+ if (error)
+ return error;
+
+ ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
+ args->agbno, args->len));
}
- return 0;
+
+ if (!args->isfl) {
+ xfs_trans_mod_sb(args->tp, args->wasdel ?
+ XFS_TRANS_SB_RES_FDBLOCKS :
+ XFS_TRANS_SB_FDBLOCKS,
+ -((long)(args->len)));
+ }
+
+ XFS_STATS_INC(xs_allocx);
+ XFS_STATS_ADD(xs_allocb, args->len);
+ return error;
}
/*
@@ -632,97 +650,194 @@ xfs_alloc_ag_vextent_exact(
{
xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
- xfs_agblock_t end; /* end of allocated extent */
int error;
xfs_agblock_t fbno; /* start block of found extent */
- xfs_agblock_t fend; /* end block of found extent */
xfs_extlen_t flen; /* length of found extent */
-#ifdef XFS_ALLOC_TRACE
- static char fname[] = "xfs_alloc_ag_vextent_exact";
-#endif
+ xfs_agblock_t tbno; /* start block of trimmed extent */
+ xfs_extlen_t tlen; /* length of trimmed extent */
+ xfs_agblock_t tend; /* end block of trimmed extent */
int i; /* success/failure of operation */
- xfs_agblock_t maxend; /* end of maximal extent */
- xfs_agblock_t minend; /* end of minimal extent */
- xfs_extlen_t rlen; /* length of returned extent */
ASSERT(args->alignment == 1);
+
/*
* Allocate/initialize a cursor for the by-number freespace btree.
*/
- bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO, NULL, 0);
+ bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_BNO);
+
/*
* Lookup bno and minlen in the btree (minlen is irrelevant, really).
* Look for the closest free block <= bno, it must contain bno
* if any free block does.
*/
- if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i)))
+ error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
+ if (error)
goto error0;
- if (!i) {
- /*
- * Didn't find it, return null.
- */
- xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
- args->agbno = NULLAGBLOCK;
- return 0;
- }
+ if (!i)
+ goto not_found;
+
/*
* Grab the freespace record.
*/
- if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i)))
+ error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
+ if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
ASSERT(fbno <= args->agbno);
- minend = args->agbno + args->minlen;
- maxend = args->agbno + args->maxlen;
- fend = fbno + flen;
+
/*
- * Give up if the freespace isn't long enough for the minimum request.
+ * Check for overlapping busy extents.
*/
- if (fend < minend) {
- xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
- args->agbno = NULLAGBLOCK;
- return 0;
- }
+ xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
+
/*
- * End of extent will be smaller of the freespace end and the
- * maximal requested end.
+ * Give up if the start of the extent is busy, or the freespace isn't
+ * long enough for the minimum request.
*/
- end = XFS_AGBLOCK_MIN(fend, maxend);
+ if (tbno > args->agbno)
+ goto not_found;
+ if (tlen < args->minlen)
+ goto not_found;
+ tend = tbno + tlen;
+ if (tend < args->agbno + args->minlen)
+ goto not_found;
+
/*
+ * End of extent will be smaller of the freespace end and the
+ * maximal requested end.
+ *
* Fix the length according to mod and prod if given.
*/
- args->len = end - args->agbno;
+ args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
+ - args->agbno;
xfs_alloc_fix_len(args);
- if (!xfs_alloc_fix_minleft(args)) {
- xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
- return 0;
- }
- rlen = args->len;
- ASSERT(args->agbno + rlen <= fend);
- end = args->agbno + rlen;
+ if (!xfs_alloc_fix_minleft(args))
+ goto not_found;
+
+ ASSERT(args->agbno + args->len <= tend);
+
/*
- * We are allocating agbno for rlen [agbno .. end]
+ * We are allocating agbno for args->len
* Allocate/initialize a cursor for the by-size btree.
*/
- cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_CNT, NULL, 0);
+ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_CNT);
ASSERT(args->agbno + args->len <=
be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
- if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
- args->agbno, args->len, XFSA_FIXUP_BNO_OK))) {
+ error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
+ args->len, XFSA_FIXUP_BNO_OK);
+ if (error) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
goto error0;
}
+
xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- TRACE_ALLOC("normal", args);
+
args->wasfromfl = 0;
+ trace_xfs_alloc_exact_done(args);
+ return 0;
+
+not_found:
+ /* Didn't find it, return null. */
+ xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+ args->agbno = NULLAGBLOCK;
+ trace_xfs_alloc_exact_notfound(args);
return 0;
error0:
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
- TRACE_ALLOC("error", args);
+ trace_xfs_alloc_exact_error(args);
+ return error;
+}
+
+/*
+ * Search the btree in a given direction via the search cursor and compare
+ * the records found against the good extent we've already found.
+ */
+STATIC int
+xfs_alloc_find_best_extent(
+ struct xfs_alloc_arg *args, /* allocation argument structure */
+ struct xfs_btree_cur **gcur, /* good cursor */
+ struct xfs_btree_cur **scur, /* searching cursor */
+ xfs_agblock_t gdiff, /* difference for search comparison */
+ xfs_agblock_t *sbno, /* extent found by search */
+ xfs_extlen_t *slen, /* extent length */
+ xfs_agblock_t *sbnoa, /* aligned extent found by search */
+ xfs_extlen_t *slena, /* aligned extent length */
+ int dir) /* 0 = search right, 1 = search left */
+{
+ xfs_agblock_t new;
+ xfs_agblock_t sdiff;
+ int error;
+ int i;
+
+ /* The good extent is perfect, no need to search. */
+ if (!gdiff)
+ goto out_use_good;
+
+ /*
+ * Look until we find a better one, run out of space or run off the end.
+ */
+ do {
+ error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
+ if (error)
+ goto error0;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+ xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
+
+ /*
+ * The good extent is closer than this one.
+ */
+ if (!dir) {
+ if (*sbnoa >= args->agbno + gdiff)
+ goto out_use_good;
+ } else {
+ if (*sbnoa <= args->agbno - gdiff)
+ goto out_use_good;
+ }
+
+ /*
+ * Same distance, compare length and pick the best.
+ */
+ if (*slena >= args->minlen) {
+ args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
+ xfs_alloc_fix_len(args);
+
+ sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
+ args->alignment,
+ args->userdata, *sbnoa,
+ *slena, &new);
+
+ /*
+ * Choose closer size and invalidate other cursor.
+ */
+ if (sdiff < gdiff)
+ goto out_use_search;
+ goto out_use_good;
+ }
+
+ if (!dir)
+ error = xfs_btree_increment(*scur, 0, &i);
+ else
+ error = xfs_btree_decrement(*scur, 0, &i);
+ if (error)
+ goto error0;
+ } while (i);
+
+out_use_good:
+ xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
+ *scur = NULL;
+ return 0;
+
+out_use_search:
+ xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
+ *gcur = NULL;
+ return 0;
+
+error0:
+ /* caller invalidates cursors */
return error;
}
@@ -739,9 +854,6 @@ xfs_alloc_ag_vextent_near(
xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
-#ifdef XFS_ALLOC_TRACE
- static char fname[] = "xfs_alloc_ag_vextent_near";
-#endif
xfs_agblock_t gtbno; /* start bno of right side entry */
xfs_agblock_t gtbnoa; /* aligned ... */
xfs_extlen_t gtdiff; /* difference to right side entry */
@@ -754,27 +866,33 @@ xfs_alloc_ag_vextent_near(
xfs_agblock_t ltbno; /* start bno of left side entry */
xfs_agblock_t ltbnoa; /* aligned ... */
xfs_extlen_t ltdiff; /* difference to left side entry */
- /*REFERENCED*/
- xfs_agblock_t ltend; /* end bno of left side entry */
xfs_extlen_t ltlen; /* length of left side entry */
xfs_extlen_t ltlena; /* aligned ... */
xfs_agblock_t ltnew; /* useful start bno of left side */
xfs_extlen_t rlen; /* length of returned extent */
-#if defined(DEBUG) && defined(__KERNEL__)
+ int forced = 0;
+#ifdef DEBUG
/*
* Randomly don't execute the first algorithm.
*/
int dofirst; /* set to do first algorithm */
- dofirst = random() & 1;
+ dofirst = prandom_u32() & 1;
#endif
+
+restart:
+ bno_cur_lt = NULL;
+ bno_cur_gt = NULL;
+ ltlen = 0;
+ gtlena = 0;
+ ltlena = 0;
+
/*
* Get a cursor for the by-size btree.
*/
- cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_CNT, NULL, 0);
- ltlen = 0;
- bno_cur_lt = bno_cur_gt = NULL;
+ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_CNT);
+
/*
* See if there are any free extents as big as maxlen.
*/
@@ -790,11 +908,13 @@ xfs_alloc_ag_vextent_near(
goto error0;
if (i == 0 || ltlen == 0) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+ trace_xfs_alloc_near_noentry(args);
return 0;
}
ASSERT(i == 1);
}
args->wasfromfl = 0;
+
/*
* First algorithm.
* If the requested extent is large wrt the freespaces available
@@ -811,8 +931,8 @@ xfs_alloc_ag_vextent_near(
xfs_extlen_t blen=0;
xfs_agblock_t bnew=0;
-#if defined(DEBUG) && defined(__KERNEL__)
- if (!dofirst)
+#ifdef DEBUG
+ if (dofirst)
break;
#endif
/*
@@ -830,7 +950,7 @@ xfs_alloc_ag_vextent_near(
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
if (ltlen >= args->minlen)
break;
- if ((error = xfs_alloc_increment(cnt_cur, 0, &i)))
+ if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
goto error0;
} while (i);
ASSERT(ltlen >= args->minlen);
@@ -840,7 +960,7 @@ xfs_alloc_ag_vextent_near(
i = cnt_cur->bc_ptrs[0];
for (j = 1, blen = 0, bdiff = 0;
!error && j && (blen < args->maxlen || bdiff > 0);
- error = xfs_alloc_increment(cnt_cur, 0, &j)) {
+ error = xfs_btree_increment(cnt_cur, 0, &j)) {
/*
* For each entry, decide if it's better than
* the previous best entry.
@@ -848,9 +968,9 @@ xfs_alloc_ag_vextent_near(
if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if (!xfs_alloc_compute_aligned(ltbno, ltlen,
- args->alignment, args->minlen,
- &ltbnoa, &ltlena))
+ xfs_alloc_compute_aligned(args, ltbno, ltlen,
+ &ltbnoa, &ltlena);
+ if (ltlena < args->minlen)
continue;
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args);
@@ -858,7 +978,8 @@ xfs_alloc_ag_vextent_near(
if (args->len < blen)
continue;
ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
- args->alignment, ltbno, ltlen, &ltnew);
+ args->alignment, args->userdata, ltbnoa,
+ ltlena, &ltnew);
if (ltnew != NULLAGBLOCK &&
(args->len > blen || ltdiff < bdiff)) {
bdiff = ltdiff;
@@ -880,12 +1001,11 @@ xfs_alloc_ag_vextent_near(
if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- ltend = ltbno + ltlen;
- ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+ ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
args->len = blen;
if (!xfs_alloc_fix_minleft(args)) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- TRACE_ALLOC("nominleft", args);
+ trace_xfs_alloc_near_nominleft(args);
return 0;
}
blen = args->len;
@@ -894,12 +1014,12 @@ xfs_alloc_ag_vextent_near(
*/
args->agbno = bnew;
ASSERT(bnew >= ltbno);
- ASSERT(bnew + blen <= ltend);
+ ASSERT(bnew + blen <= ltbno + ltlen);
/*
* Set up a cursor for the by-bno tree.
*/
- bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp,
- args->agbp, args->agno, XFS_BTNUM_BNO, NULL, 0);
+ bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
+ args->agbp, args->agno, XFS_BTNUM_BNO);
/*
* Fix up the btree entries.
*/
@@ -908,7 +1028,8 @@ xfs_alloc_ag_vextent_near(
goto error0;
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
- TRACE_ALLOC("first", args);
+
+ trace_xfs_alloc_near_first(args);
return 0;
}
/*
@@ -926,8 +1047,8 @@ xfs_alloc_ag_vextent_near(
/*
* Allocate and initialize the cursor for the leftward search.
*/
- bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO, NULL, 0);
+ bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_BNO);
/*
* Lookup <= bno to find the leftward search's starting point.
*/
@@ -950,7 +1071,7 @@ xfs_alloc_ag_vextent_near(
* Increment the cursor, so we will point at the entry just right
* of the leftward entry if any, or to the leftmost entry.
*/
- if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
+ if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
goto error0;
if (!i) {
/*
@@ -969,11 +1090,11 @@ xfs_alloc_ag_vextent_near(
if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if (xfs_alloc_compute_aligned(ltbno, ltlen,
- args->alignment, args->minlen,
- &ltbnoa, &ltlena))
+ xfs_alloc_compute_aligned(args, ltbno, ltlen,
+ &ltbnoa, &ltlena);
+ if (ltlena >= args->minlen)
break;
- if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i)))
+ if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
goto error0;
if (!i) {
xfs_btree_del_cursor(bno_cur_lt,
@@ -985,11 +1106,11 @@ xfs_alloc_ag_vextent_near(
if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if (xfs_alloc_compute_aligned(gtbno, gtlen,
- args->alignment, args->minlen,
- &gtbnoa, &gtlena))
+ xfs_alloc_compute_aligned(args, gtbno, gtlen,
+ &gtbnoa, &gtlena);
+ if (gtlena >= args->minlen)
break;
- if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
+ if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
goto error0;
if (!i) {
xfs_btree_del_cursor(bno_cur_gt,
@@ -998,211 +1119,65 @@ xfs_alloc_ag_vextent_near(
}
}
} while (bno_cur_lt || bno_cur_gt);
+
/*
* Got both cursors still active, need to find better entry.
*/
if (bno_cur_lt && bno_cur_gt) {
- /*
- * Left side is long enough, look for a right side entry.
- */
if (ltlena >= args->minlen) {
/*
- * Fix up the length.
+ * Left side is good, look for a right side entry.
*/
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args);
- rlen = args->len;
- ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
- args->alignment, ltbno, ltlen, &ltnew);
- /*
- * Not perfect.
- */
- if (ltdiff) {
- /*
- * Look until we find a better one, run out of
- * space, or run off the end.
- */
- while (bno_cur_lt && bno_cur_gt) {
- if ((error = xfs_alloc_get_rec(
- bno_cur_gt, &gtbno,
- &gtlen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- xfs_alloc_compute_aligned(gtbno, gtlen,
- args->alignment, args->minlen,
- &gtbnoa, &gtlena);
- /*
- * The left one is clearly better.
- */
- if (gtbnoa >= args->agbno + ltdiff) {
- xfs_btree_del_cursor(
- bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- break;
- }
- /*
- * If we reach a big enough entry,
- * compare the two and pick the best.
- */
- if (gtlena >= args->minlen) {
- args->len =
- XFS_EXTLEN_MIN(gtlena,
- args->maxlen);
- xfs_alloc_fix_len(args);
- rlen = args->len;
- gtdiff = xfs_alloc_compute_diff(
- args->agbno, rlen,
- args->alignment,
- gtbno, gtlen, &gtnew);
- /*
- * Right side is better.
- */
- if (gtdiff < ltdiff) {
- xfs_btree_del_cursor(
- bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- }
- /*
- * Left side is better.
- */
- else {
- xfs_btree_del_cursor(
- bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- }
- break;
- }
- /*
- * Fell off the right end.
- */
- if ((error = xfs_alloc_increment(
- bno_cur_gt, 0, &i)))
- goto error0;
- if (!i) {
- xfs_btree_del_cursor(
- bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- break;
- }
- }
- }
- /*
- * The left side is perfect, trash the right side.
- */
- else {
- xfs_btree_del_cursor(bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- }
- }
- /*
- * It's the right side that was found first, look left.
- */
- else {
+ ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
+ args->alignment, args->userdata, ltbnoa,
+ ltlena, &ltnew);
+
+ error = xfs_alloc_find_best_extent(args,
+ &bno_cur_lt, &bno_cur_gt,
+ ltdiff, &gtbno, &gtlen,
+ &gtbnoa, &gtlena,
+ 0 /* search right */);
+ } else {
+ ASSERT(gtlena >= args->minlen);
+
/*
- * Fix up the length.
+ * Right side is good, look for a left side entry.
*/
args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
xfs_alloc_fix_len(args);
- rlen = args->len;
- gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
- args->alignment, gtbno, gtlen, &gtnew);
- /*
- * Right side entry isn't perfect.
- */
- if (gtdiff) {
- /*
- * Look until we find a better one, run out of
- * space, or run off the end.
- */
- while (bno_cur_lt && bno_cur_gt) {
- if ((error = xfs_alloc_get_rec(
- bno_cur_lt, &ltbno,
- &ltlen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- xfs_alloc_compute_aligned(ltbno, ltlen,
- args->alignment, args->minlen,
- &ltbnoa, &ltlena);
- /*
- * The right one is clearly better.
- */
- if (ltbnoa <= args->agbno - gtdiff) {
- xfs_btree_del_cursor(
- bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- break;
- }
- /*
- * If we reach a big enough entry,
- * compare the two and pick the best.
- */
- if (ltlena >= args->minlen) {
- args->len = XFS_EXTLEN_MIN(
- ltlena, args->maxlen);
- xfs_alloc_fix_len(args);
- rlen = args->len;
- ltdiff = xfs_alloc_compute_diff(
- args->agbno, rlen,
- args->alignment,
- ltbno, ltlen, &ltnew);
- /*
- * Left side is better.
- */
- if (ltdiff < gtdiff) {
- xfs_btree_del_cursor(
- bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- }
- /*
- * Right side is better.
- */
- else {
- xfs_btree_del_cursor(
- bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- }
- break;
- }
- /*
- * Fell off the left end.
- */
- if ((error = xfs_alloc_decrement(
- bno_cur_lt, 0, &i)))
- goto error0;
- if (!i) {
- xfs_btree_del_cursor(bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- break;
- }
- }
- }
- /*
- * The right side is perfect, trash the left side.
- */
- else {
- xfs_btree_del_cursor(bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- }
+ gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
+ args->alignment, args->userdata, gtbnoa,
+ gtlena, &gtnew);
+
+ error = xfs_alloc_find_best_extent(args,
+ &bno_cur_gt, &bno_cur_lt,
+ gtdiff, &ltbno, &ltlen,
+ &ltbnoa, &ltlena,
+ 1 /* search left */);
}
+
+ if (error)
+ goto error0;
}
+
/*
* If we couldn't get anything, give up.
*/
if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
- TRACE_ALLOC("neither", args);
+ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+
+ if (!forced++) {
+ trace_xfs_alloc_near_busy(args);
+ xfs_log_force(args->mp, XFS_LOG_SYNC);
+ goto restart;
+ }
+ trace_xfs_alloc_size_neither(args);
args->agbno = NULLAGBLOCK;
return 0;
}
+
/*
* At this point we have selected a freespace entry, either to the
* left or to the right. If it's on the right, copy all the
@@ -1219,35 +1194,41 @@ xfs_alloc_ag_vextent_near(
j = 1;
} else
j = 0;
+
/*
* Fix up the length and compute the useful address.
*/
- ltend = ltbno + ltlen;
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args);
if (!xfs_alloc_fix_minleft(args)) {
- TRACE_ALLOC("nominleft", args);
+ trace_xfs_alloc_near_nominleft(args);
xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
return 0;
}
rlen = args->len;
- (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
- ltlen, &ltnew);
+ (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
+ args->userdata, ltbnoa, ltlena, &ltnew);
ASSERT(ltnew >= ltbno);
- ASSERT(ltnew + rlen <= ltend);
+ ASSERT(ltnew + rlen <= ltbnoa + ltlena);
ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
args->agbno = ltnew;
+
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
ltnew, rlen, XFSA_FIXUP_BNO_OK)))
goto error0;
- TRACE_ALLOC(j ? "gt" : "lt", args);
+
+ if (j)
+ trace_xfs_alloc_near_greater(args);
+ else
+ trace_xfs_alloc_near_lesser(args);
+
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
return 0;
error0:
- TRACE_ALLOC("error", args);
+ trace_xfs_alloc_near_error(args);
if (cnt_cur != NULL)
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
if (bno_cur_lt != NULL)
@@ -1272,56 +1253,95 @@ xfs_alloc_ag_vextent_size(
int error; /* error result */
xfs_agblock_t fbno; /* start of found freespace */
xfs_extlen_t flen; /* length of found freespace */
-#ifdef XFS_ALLOC_TRACE
- static char fname[] = "xfs_alloc_ag_vextent_size";
-#endif
int i; /* temp status variable */
xfs_agblock_t rbno; /* returned block number */
xfs_extlen_t rlen; /* length of returned extent */
+ int forced = 0;
+restart:
/*
* Allocate and initialize a cursor for the by-size btree.
*/
- cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_CNT, NULL, 0);
+ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_CNT);
bno_cur = NULL;
+
/*
* Look for an entry >= maxlen+alignment-1 blocks.
*/
if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
args->maxlen + args->alignment - 1, &i)))
goto error0;
+
/*
- * If none, then pick up the last entry in the tree unless the
- * tree is empty.
+ * If none or we have busy extents that we cannot allocate from, then
+ * we have to settle for a smaller extent. In the case that there are
+ * no large extents, this will return the last entry in the tree unless
+ * the tree is empty. In the case that there are only busy large
+ * extents, this will return the largest small extent unless there
+ * are no smaller extents available.
*/
- if (!i) {
- if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
- &flen, &i)))
+ if (!i || forced > 1) {
+ error = xfs_alloc_ag_vextent_small(args, cnt_cur,
+ &fbno, &flen, &i);
+ if (error)
goto error0;
if (i == 0 || flen == 0) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- TRACE_ALLOC("noentry", args);
+ trace_xfs_alloc_size_noentry(args);
return 0;
}
ASSERT(i == 1);
+ xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
+ } else {
+ /*
+ * Search for a non-busy extent that is large enough.
+ * If we are at low space, don't check, or if we fall of
+ * the end of the btree, turn off the busy check and
+ * restart.
+ */
+ for (;;) {
+ error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
+ if (error)
+ goto error0;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ xfs_alloc_compute_aligned(args, fbno, flen,
+ &rbno, &rlen);
+
+ if (rlen >= args->maxlen)
+ break;
+
+ error = xfs_btree_increment(cnt_cur, 0, &i);
+ if (error)
+ goto error0;
+ if (i == 0) {
+ /*
+ * Our only valid extents must have been busy.
+ * Make it unbusy by forcing the log out and
+ * retrying. If we've been here before, forcing
+ * the log isn't making the extents available,
+ * which means they have probably been freed in
+ * this transaction. In that case, we have to
+ * give up on them and we'll attempt a minlen
+ * allocation the next time around.
+ */
+ xfs_btree_del_cursor(cnt_cur,
+ XFS_BTREE_NOERROR);
+ trace_xfs_alloc_size_busy(args);
+ if (!forced++)
+ xfs_log_force(args->mp, XFS_LOG_SYNC);
+ goto restart;
+ }
+ }
}
- /*
- * There's a freespace as big as maxlen+alignment-1, get it.
- */
- else {
- if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- }
+
/*
* In the first case above, we got the last entry in the
* by-size btree. Now we check to see if the space hits maxlen
* once aligned; if not, we search left for something better.
* This can't happen in the second case above.
*/
- xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen,
- &rbno, &rlen);
rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
(rlen <= flen && rbno + rlen <= fbno + flen), error0);
@@ -1336,7 +1356,7 @@ xfs_alloc_ag_vextent_size(
bestflen = flen;
bestfbno = fbno;
for (;;) {
- if ((error = xfs_alloc_decrement(cnt_cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
goto error0;
if (i == 0)
break;
@@ -1346,8 +1366,8 @@ xfs_alloc_ag_vextent_size(
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
if (flen < bestrlen)
break;
- xfs_alloc_compute_aligned(fbno, flen, args->alignment,
- args->minlen, &rbno, &rlen);
+ xfs_alloc_compute_aligned(args, fbno, flen,
+ &rbno, &rlen);
rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
(rlen <= flen && rbno + rlen <= fbno + flen),
@@ -1375,20 +1395,26 @@ xfs_alloc_ag_vextent_size(
* Fix up the length.
*/
args->len = rlen;
- xfs_alloc_fix_len(args);
- if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- TRACE_ALLOC("nominleft", args);
- args->agbno = NULLAGBLOCK;
- return 0;
+ if (rlen < args->minlen) {
+ if (!forced++) {
+ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+ trace_xfs_alloc_size_busy(args);
+ xfs_log_force(args->mp, XFS_LOG_SYNC);
+ goto restart;
+ }
+ goto out_nominleft;
}
+ xfs_alloc_fix_len(args);
+
+ if (!xfs_alloc_fix_minleft(args))
+ goto out_nominleft;
rlen = args->len;
XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
/*
* Allocate and initialize a cursor for the by-block tree.
*/
- bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO, NULL, 0);
+ bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_BNO);
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
rbno, rlen, XFSA_FIXUP_CNT_OK)))
goto error0;
@@ -1401,16 +1427,22 @@ xfs_alloc_ag_vextent_size(
args->agbno + args->len <=
be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
error0);
- TRACE_ALLOC("normal", args);
+ trace_xfs_alloc_size_done(args);
return 0;
error0:
- TRACE_ALLOC("error", args);
+ trace_xfs_alloc_size_error(args);
if (cnt_cur)
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
if (bno_cur)
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
return error;
+
+out_nominleft:
+ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+ trace_xfs_alloc_size_nominleft(args);
+ args->agbno = NULLAGBLOCK;
+ return 0;
}
/*
@@ -1429,12 +1461,9 @@ xfs_alloc_ag_vextent_small(
int error;
xfs_agblock_t fbno;
xfs_extlen_t flen;
-#ifdef XFS_ALLOC_TRACE
- static char fname[] = "xfs_alloc_ag_vextent_small";
-#endif
int i;
- if ((error = xfs_alloc_decrement(ccur, 0, &i)))
+ if ((error = xfs_btree_decrement(ccur, 0, &i)))
goto error0;
if (i) {
if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
@@ -1449,9 +1478,13 @@ xfs_alloc_ag_vextent_small(
else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
(be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
> args->minleft)) {
- if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno)))
+ error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
+ if (error)
goto error0;
if (fbno != NULLAGBLOCK) {
+ xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
+ args->userdata);
+
if (args->userdata) {
xfs_buf_t *bp;
@@ -1466,7 +1499,7 @@ xfs_alloc_ag_vextent_small(
be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
error0);
args->wasfromfl = 1;
- TRACE_ALLOC("freelist", args);
+ trace_xfs_alloc_small_freelist(args);
*stat = 0;
return 0;
}
@@ -1479,24 +1512,26 @@ xfs_alloc_ag_vextent_small(
/*
* Can't allocate from the freelist for some reason.
*/
- else
+ else {
+ fbno = NULLAGBLOCK;
flen = 0;
+ }
/*
* Can't do the allocation, give up.
*/
if (flen < args->minlen) {
args->agbno = NULLAGBLOCK;
- TRACE_ALLOC("notenough", args);
+ trace_xfs_alloc_small_notenough(args);
flen = 0;
}
*fbnop = fbno;
*flenp = flen;
*stat = 1;
- TRACE_ALLOC("normal", args);
+ trace_xfs_alloc_small_done(args);
return 0;
error0:
- TRACE_ALLOC("error", args);
+ trace_xfs_alloc_small_error(args);
return error;
}
@@ -1515,9 +1550,6 @@ xfs_free_ag_extent(
xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
int error; /* error return value */
-#ifdef XFS_ALLOC_TRACE
- static char fname[] = "xfs_free_ag_extent";
-#endif
xfs_agblock_t gtbno; /* start of right neighbor block */
xfs_extlen_t gtlen; /* length of right neighbor block */
int haveleft; /* have a left neighbor block */
@@ -1528,13 +1560,13 @@ xfs_free_ag_extent(
xfs_mount_t *mp; /* mount point struct for filesystem */
xfs_agblock_t nbno; /* new starting block of freespace */
xfs_extlen_t nlen; /* new length of freespace */
+ xfs_perag_t *pag; /* per allocation group data */
mp = tp->t_mountp;
/*
* Allocate and initialize a cursor for the by-block btree.
*/
- bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, NULL,
- 0);
+ bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
cnt_cur = NULL;
/*
* Look for a neighboring block on the left (lower block numbers)
@@ -1567,7 +1599,7 @@ xfs_free_ag_extent(
* Look for a neighboring block on the right (higher block numbers)
* that is contiguous with this space.
*/
- if ((error = xfs_alloc_increment(bno_cur, 0, &haveright)))
+ if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
goto error0;
if (haveright) {
/*
@@ -1593,8 +1625,7 @@ xfs_free_ag_extent(
/*
* Now allocate and initialize a cursor for the by-size tree.
*/
- cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, NULL,
- 0);
+ cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
/*
* Have both left and right contiguous neighbors.
* Merge all three into a single free block.
@@ -1606,7 +1637,7 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
@@ -1615,19 +1646,19 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
* Delete the old by-block entry for the right block.
*/
- if ((error = xfs_alloc_delete(bno_cur, &i)))
+ if ((error = xfs_btree_delete(bno_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
* Move the by-block cursor back to the left neighbor.
*/
- if ((error = xfs_alloc_decrement(bno_cur, 0, &i)))
+ if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
#ifdef DEBUG
@@ -1666,14 +1697,14 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
* Back up the by-block cursor to the left neighbor, and
* update its length.
*/
- if ((error = xfs_alloc_decrement(bno_cur, 0, &i)))
+ if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
nbno = ltbno;
@@ -1692,7 +1723,7 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
@@ -1711,7 +1742,7 @@ xfs_free_ag_extent(
else {
nbno = bno;
nlen = len;
- if ((error = xfs_alloc_insert(bno_cur, &i)))
+ if ((error = xfs_btree_insert(bno_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
}
@@ -1723,55 +1754,32 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
- if ((error = xfs_alloc_insert(cnt_cur, &i)))
+ if ((error = xfs_btree_insert(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
cnt_cur = NULL;
+
/*
* Update the freespace totals in the ag and superblock.
*/
- {
- xfs_agf_t *agf;
- xfs_perag_t *pag; /* per allocation group data */
-
- agf = XFS_BUF_TO_AGF(agbp);
- pag = &mp->m_perag[agno];
- be32_add(&agf->agf_freeblks, len);
- xfs_trans_agblocks_delta(tp, len);
- pag->pagf_freeblks += len;
- XFS_WANT_CORRUPTED_GOTO(
- be32_to_cpu(agf->agf_freeblks) <=
- be32_to_cpu(agf->agf_length),
- error0);
- TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
- xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
- if (!isfl)
- xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
- XFS_STATS_INC(xs_freex);
- XFS_STATS_ADD(xs_freeb, len);
- }
- TRACE_FREE(haveleft ?
- (haveright ? "both" : "left") :
- (haveright ? "right" : "none"),
- agno, bno, len, isfl);
-
- /*
- * Since blocks move to the free list without the coordination
- * used in xfs_bmap_finish, we can't allow block to be available
- * for reallocation and non-transaction writing (user data)
- * until we know that the transaction that moved it to the free
- * list is permanently on disk. We track the blocks by declaring
- * these blocks as "busy"; the busy list is maintained on a per-ag
- * basis and each transaction records which entries should be removed
- * when the iclog commits to disk. If a busy block is allocated,
- * the iclog is pushed up to the LSN that freed the block.
- */
- xfs_alloc_mark_busy(tp, agno, bno, len);
+ pag = xfs_perag_get(mp, agno);
+ error = xfs_alloc_update_counters(tp, pag, agbp, len);
+ xfs_perag_put(pag);
+ if (error)
+ goto error0;
+
+ if (!isfl)
+ xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
+ XFS_STATS_INC(xs_freex);
+ XFS_STATS_ADD(xs_freeb, len);
+
+ trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
+
return 0;
error0:
- TRACE_FREE("error", agno, bno, len, isfl);
+ trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
if (bno_cur)
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
if (cnt_cur)
@@ -1807,6 +1815,25 @@ xfs_alloc_compute_maxlevels(
}
/*
+ * Find the length of the longest extent in an AG.
+ */
+xfs_extlen_t
+xfs_alloc_longest_free_extent(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag)
+{
+ xfs_extlen_t need, delta = 0;
+
+ need = XFS_MIN_FREELIST_PAG(pag, mp);
+ if (need > pag->pagf_flcount)
+ delta = need - pag->pagf_flcount;
+
+ if (pag->pagf_longest > delta)
+ return pag->pagf_longest - delta;
+ return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
+}
+
+/*
* Decide whether to use this allocation group for this allocation.
* If so, fix up the btree freelist's size.
*/
@@ -1837,40 +1864,44 @@ xfs_alloc_fix_freelist(
&agbp)))
return error;
if (!pag->pagf_init) {
+ ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
+ ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
args->agbp = NULL;
return 0;
}
} else
agbp = NULL;
- /* If this is a metadata prefered pag and we are user data
+ /*
+ * If this is a metadata preferred pag and we are user data
* then try somewhere else if we are not being asked to
* try harder at this point
*/
- if (pag->pagf_metadata && args->userdata && flags) {
+ if (pag->pagf_metadata && args->userdata &&
+ (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
+ ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
args->agbp = NULL;
return 0;
}
- need = XFS_MIN_FREELIST_PAG(pag, mp);
- delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
- /*
- * If it looks like there isn't a long enough extent, or enough
- * total blocks, reject it.
- */
- longest = (pag->pagf_longest > delta) ?
- (pag->pagf_longest - delta) :
- (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
- if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
- (args->minleft &&
- (int)(pag->pagf_freeblks + pag->pagf_flcount -
- need - args->total) <
- (int)args->minleft)) {
- if (agbp)
- xfs_trans_brelse(tp, agbp);
- args->agbp = NULL;
- return 0;
+ if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
+ /*
+ * If it looks like there isn't a long enough extent, or enough
+ * total blocks, reject it.
+ */
+ need = XFS_MIN_FREELIST_PAG(pag, mp);
+ longest = xfs_alloc_longest_free_extent(mp, pag);
+ if ((args->minlen + args->alignment + args->minalignslop - 1) >
+ longest ||
+ ((int)(pag->pagf_freeblks + pag->pagf_flcount -
+ need - args->total) < (int)args->minleft)) {
+ if (agbp)
+ xfs_trans_brelse(tp, agbp);
+ args->agbp = NULL;
+ return 0;
+ }
}
+
/*
* Get the a.g. freespace buffer.
* Can fail if we're not blocking on locks, and it's held.
@@ -1880,6 +1911,8 @@ xfs_alloc_fix_freelist(
&agbp)))
return error;
if (agbp == NULL) {
+ ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
+ ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
args->agbp = NULL;
return 0;
}
@@ -1889,22 +1922,24 @@ xfs_alloc_fix_freelist(
*/
agf = XFS_BUF_TO_AGF(agbp);
need = XFS_MIN_FREELIST(agf, mp);
- delta = need > be32_to_cpu(agf->agf_flcount) ?
- (need - be32_to_cpu(agf->agf_flcount)) : 0;
/*
* If there isn't enough total or single-extent, reject it.
*/
- longest = be32_to_cpu(agf->agf_longest);
- longest = (longest > delta) ? (longest - delta) :
- (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
- if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
- (args->minleft &&
- (int)(be32_to_cpu(agf->agf_freeblks) +
- be32_to_cpu(agf->agf_flcount) - need - args->total) <
- (int)args->minleft)) {
- xfs_trans_brelse(tp, agbp);
- args->agbp = NULL;
- return 0;
+ if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
+ delta = need > be32_to_cpu(agf->agf_flcount) ?
+ (need - be32_to_cpu(agf->agf_flcount)) : 0;
+ longest = be32_to_cpu(agf->agf_longest);
+ longest = (longest > delta) ? (longest - delta) :
+ (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
+ if ((args->minlen + args->alignment + args->minalignslop - 1) >
+ longest ||
+ ((int)(be32_to_cpu(agf->agf_freeblks) +
+ be32_to_cpu(agf->agf_flcount) - need - args->total) <
+ (int)args->minleft)) {
+ xfs_trans_brelse(tp, agbp);
+ args->agbp = NULL;
+ return 0;
+ }
}
/*
* Make the freelist shorter if it's too long.
@@ -1912,7 +1947,8 @@ xfs_alloc_fix_freelist(
while (be32_to_cpu(agf->agf_flcount) > need) {
xfs_buf_t *bp;
- if ((error = xfs_alloc_get_freelist(tp, agbp, &bno)))
+ error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
+ if (error)
return error;
if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
return error;
@@ -1922,12 +1958,11 @@ xfs_alloc_fix_freelist(
/*
* Initialize the args structure.
*/
+ memset(&targs, 0, sizeof(targs));
targs.tp = tp;
targs.mp = mp;
targs.agbp = agbp;
targs.agno = args->agno;
- targs.mod = targs.minleft = targs.wasdel = targs.userdata =
- targs.minalignslop = 0;
targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
targs.type = XFS_ALLOCTYPE_THIS_AG;
targs.pag = pag;
@@ -1942,24 +1977,33 @@ xfs_alloc_fix_freelist(
/*
* Allocate as many blocks as possible at once.
*/
- if ((error = xfs_alloc_ag_vextent(&targs)))
+ if ((error = xfs_alloc_ag_vextent(&targs))) {
+ xfs_trans_brelse(tp, agflbp);
return error;
+ }
/*
* Stop if we run out. Won't happen if callers are obeying
* the restrictions correctly. Can happen for free calls
* on a completely full ag.
*/
- if (targs.agbno == NULLAGBLOCK)
- break;
+ if (targs.agbno == NULLAGBLOCK) {
+ if (flags & XFS_ALLOC_FLAG_FREEING)
+ break;
+ xfs_trans_brelse(tp, agflbp);
+ args->agbp = NULL;
+ return 0;
+ }
/*
* Put each allocated block on the list.
*/
for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
- if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp,
- bno)))
+ error = xfs_alloc_put_freelist(tp, agbp,
+ agflbp, bno, 0);
+ if (error)
return error;
}
}
+ xfs_trans_brelse(tp, agflbp);
args->agbp = agbp;
return 0;
}
@@ -1972,23 +2016,22 @@ int /* error */
xfs_alloc_get_freelist(
xfs_trans_t *tp, /* transaction pointer */
xfs_buf_t *agbp, /* buffer containing the agf structure */
- xfs_agblock_t *bnop) /* block address retrieved from freelist */
+ xfs_agblock_t *bnop, /* block address retrieved from freelist */
+ int btreeblk) /* destination is a AGF btree */
{
xfs_agf_t *agf; /* a.g. freespace structure */
- xfs_agfl_t *agfl; /* a.g. freelist structure */
xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
xfs_agblock_t bno; /* block number returned */
+ __be32 *agfl_bno;
int error;
-#ifdef XFS_ALLOC_TRACE
- static char fname[] = "xfs_alloc_get_freelist";
-#endif
- xfs_mount_t *mp; /* mount structure */
+ int logflags;
+ xfs_mount_t *mp = tp->t_mountp;
xfs_perag_t *pag; /* per allocation group data */
- agf = XFS_BUF_TO_AGF(agbp);
/*
* Freelist is empty, give up.
*/
+ agf = XFS_BUF_TO_AGF(agbp);
if (!agf->agf_flcount) {
*bnop = NULLAGBLOCK;
return 0;
@@ -1996,36 +2039,38 @@ xfs_alloc_get_freelist(
/*
* Read the array of free blocks.
*/
- mp = tp->t_mountp;
- if ((error = xfs_alloc_read_agfl(mp, tp,
- be32_to_cpu(agf->agf_seqno), &agflbp)))
+ error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
+ &agflbp);
+ if (error)
return error;
- agfl = XFS_BUF_TO_AGFL(agflbp);
+
+
/*
* Get the block number and update the data structures.
*/
- bno = INT_GET(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)], ARCH_CONVERT);
- be32_add(&agf->agf_flfirst, 1);
+ agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
+ bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
+ be32_add_cpu(&agf->agf_flfirst, 1);
xfs_trans_brelse(tp, agflbp);
if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
agf->agf_flfirst = 0;
- pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
- be32_add(&agf->agf_flcount, -1);
+
+ pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
+ be32_add_cpu(&agf->agf_flcount, -1);
xfs_trans_agflist_delta(tp, -1);
pag->pagf_flcount--;
- TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
- xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
+ xfs_perag_put(pag);
+
+ logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
+ if (btreeblk) {
+ be32_add_cpu(&agf->agf_btreeblks, 1);
+ pag->pagf_btreeblks++;
+ logflags |= XFS_AGF_BTREEBLKS;
+ }
+
+ xfs_alloc_log_agf(tp, agbp, logflags);
*bnop = bno;
- /*
- * As blocks are freed, they are added to the per-ag busy list
- * and remain there until the freeing transaction is committed to
- * disk. Now that we have allocated blocks, this list must be
- * searched to see if a block is being reused. If one is, then
- * the freeing transaction must be pushed to disk NOW by forcing
- * to disk all iclogs up that transaction's LSN.
- */
- xfs_alloc_search_busy(tp, be32_to_cpu(agf->agf_seqno), bno, 1);
return 0;
}
@@ -2052,9 +2097,15 @@ xfs_alloc_log_agf(
offsetof(xfs_agf_t, agf_flcount),
offsetof(xfs_agf_t, agf_freeblks),
offsetof(xfs_agf_t, agf_longest),
+ offsetof(xfs_agf_t, agf_btreeblks),
+ offsetof(xfs_agf_t, agf_uuid),
sizeof(xfs_agf_t)
};
+ trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
+
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
+
xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
}
@@ -2087,17 +2138,17 @@ xfs_alloc_put_freelist(
xfs_trans_t *tp, /* transaction pointer */
xfs_buf_t *agbp, /* buffer for a.g. freelist header */
xfs_buf_t *agflbp,/* buffer for a.g. free block array */
- xfs_agblock_t bno) /* block being freed */
+ xfs_agblock_t bno, /* block being freed */
+ int btreeblk) /* block came from a AGF btree */
{
xfs_agf_t *agf; /* a.g. freespace structure */
- xfs_agfl_t *agfl; /* a.g. free block array */
- xfs_agblock_t *blockp;/* pointer to array entry */
+ __be32 *blockp;/* pointer to array entry */
int error;
-#ifdef XFS_ALLOC_TRACE
- static char fname[] = "xfs_alloc_put_freelist";
-#endif
+ int logflags;
xfs_mount_t *mp; /* mount structure */
xfs_perag_t *pag; /* per allocation group data */
+ __be32 *agfl_bno;
+ int startoff;
agf = XFS_BUF_TO_AGF(agbp);
mp = tp->t_mountp;
@@ -2105,92 +2156,198 @@ xfs_alloc_put_freelist(
if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
be32_to_cpu(agf->agf_seqno), &agflbp)))
return error;
- agfl = XFS_BUF_TO_AGFL(agflbp);
- be32_add(&agf->agf_fllast, 1);
+ be32_add_cpu(&agf->agf_fllast, 1);
if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
agf->agf_fllast = 0;
- pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
- be32_add(&agf->agf_flcount, 1);
+
+ pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
+ be32_add_cpu(&agf->agf_flcount, 1);
xfs_trans_agflist_delta(tp, 1);
pag->pagf_flcount++;
+
+ logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
+ if (btreeblk) {
+ be32_add_cpu(&agf->agf_btreeblks, -1);
+ pag->pagf_btreeblks--;
+ logflags |= XFS_AGF_BTREEBLKS;
+ }
+ xfs_perag_put(pag);
+
+ xfs_alloc_log_agf(tp, agbp, logflags);
+
ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
- blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
- INT_SET(*blockp, ARCH_CONVERT, bno);
- TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
- xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
- xfs_trans_log_buf(tp, agflbp,
- (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
- (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
- sizeof(xfs_agblock_t) - 1));
+
+ agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
+ blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
+ *blockp = cpu_to_be32(bno);
+ startoff = (char *)blockp - (char *)agflbp->b_addr;
+
+ xfs_alloc_log_agf(tp, agbp, logflags);
+
+ xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
+ xfs_trans_log_buf(tp, agflbp, startoff,
+ startoff + sizeof(xfs_agblock_t) - 1);
return 0;
}
+static bool
+xfs_agf_verify(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp)
+ {
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
+
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
+ return false;
+
+ if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
+ XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
+ be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
+ be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
+ be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
+ be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
+ return false;
+
+ /*
+ * during growfs operations, the perag is not fully initialised,
+ * so we can't use it for any useful checking. growfs ensures we can't
+ * use it by using uncached buffers that don't have the perag attached
+ * so we can detect and avoid this problem.
+ */
+ if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
+ return false;
+
+ if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
+ be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
+ return false;
+
+ return true;;
+
+}
+
+static void
+xfs_agf_read_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
+ XFS_ERRTAG_ALLOC_READ_AGF,
+ XFS_RANDOM_ALLOC_READ_AGF))
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
+}
+
+static void
+xfs_agf_write_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+ if (!xfs_agf_verify(mp, bp)) {
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
+ }
+
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return;
+
+ if (bip)
+ XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+
+ xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
+}
+
+const struct xfs_buf_ops xfs_agf_buf_ops = {
+ .verify_read = xfs_agf_read_verify,
+ .verify_write = xfs_agf_write_verify,
+};
+
/*
* Read in the allocation group header (free/alloc section).
*/
int /* error */
-xfs_alloc_read_agf(
- xfs_mount_t *mp, /* mount point structure */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_agnumber_t agno, /* allocation group number */
- int flags, /* XFS_ALLOC_FLAG_... */
- xfs_buf_t **bpp) /* buffer for the ag freelist header */
+xfs_read_agf(
+ struct xfs_mount *mp, /* mount point structure */
+ struct xfs_trans *tp, /* transaction pointer */
+ xfs_agnumber_t agno, /* allocation group number */
+ int flags, /* XFS_BUF_ */
+ struct xfs_buf **bpp) /* buffer for the ag freelist header */
{
- xfs_agf_t *agf; /* ag freelist header */
- int agf_ok; /* set if agf is consistent */
- xfs_buf_t *bp; /* return value */
- xfs_perag_t *pag; /* per allocation group data */
int error;
+ trace_xfs_read_agf(mp, agno);
+
ASSERT(agno != NULLAGNUMBER);
error = xfs_trans_read_buf(
mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1),
- (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U,
- &bp);
+ XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
if (error)
return error;
- ASSERT(!bp || !XFS_BUF_GETERROR(bp));
- if (!bp) {
- *bpp = NULL;
+ if (!*bpp)
return 0;
- }
- /*
- * Validate the magic number of the agf block.
- */
- agf = XFS_BUF_TO_AGF(bp);
- agf_ok =
- be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
- XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
- be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
- be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
- be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
- be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
- if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
- XFS_RANDOM_ALLOC_READ_AGF))) {
- XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
- XFS_ERRLEVEL_LOW, mp, agf);
- xfs_trans_brelse(tp, bp);
- return XFS_ERROR(EFSCORRUPTED);
- }
- pag = &mp->m_perag[agno];
+
+ ASSERT(!(*bpp)->b_error);
+ xfs_buf_set_ref(*bpp, XFS_AGF_REF);
+ return 0;
+}
+
+/*
+ * Read in the allocation group header (free/alloc section).
+ */
+int /* error */
+xfs_alloc_read_agf(
+ struct xfs_mount *mp, /* mount point structure */
+ struct xfs_trans *tp, /* transaction pointer */
+ xfs_agnumber_t agno, /* allocation group number */
+ int flags, /* XFS_ALLOC_FLAG_... */
+ struct xfs_buf **bpp) /* buffer for the ag freelist header */
+{
+ struct xfs_agf *agf; /* ag freelist header */
+ struct xfs_perag *pag; /* per allocation group data */
+ int error;
+
+ trace_xfs_alloc_read_agf(mp, agno);
+
+ ASSERT(agno != NULLAGNUMBER);
+ error = xfs_read_agf(mp, tp, agno,
+ (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
+ bpp);
+ if (error)
+ return error;
+ if (!*bpp)
+ return 0;
+ ASSERT(!(*bpp)->b_error);
+
+ agf = XFS_BUF_TO_AGF(*bpp);
+ pag = xfs_perag_get(mp, agno);
if (!pag->pagf_init) {
pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
+ pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
pag->pagf_longest = be32_to_cpu(agf->agf_longest);
pag->pagf_levels[XFS_BTNUM_BNOi] =
be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
pag->pagf_levels[XFS_BTNUM_CNTi] =
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
- spinlock_init(&pag->pagb_lock, "xfspagb");
- pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS *
- sizeof(xfs_perag_busy_t), KM_SLEEP);
+ spin_lock_init(&pag->pagb_lock);
+ pag->pagb_count = 0;
+ pag->pagb_tree = RB_ROOT;
pag->pagf_init = 1;
}
#ifdef DEBUG
else if (!XFS_FORCED_SHUTDOWN(mp)) {
ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
+ ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
@@ -2199,8 +2356,7 @@ xfs_alloc_read_agf(
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
}
#endif
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF);
- *bpp = bp;
+ xfs_perag_put(pag);
return 0;
}
@@ -2216,9 +2372,6 @@ xfs_alloc_vextent(
xfs_agblock_t agsize; /* allocation group size */
int error;
int flags; /* XFS_ALLOC_FLAG_... locking flags */
-#ifdef XFS_ALLOC_TRACE
- static char fname[] = "xfs_alloc_vextent";
-#endif
xfs_extlen_t minleft;/* minimum left value, temp copy */
xfs_mount_t *mp; /* mount structure pointer */
xfs_agnumber_t sagno; /* starting allocation group number */
@@ -2250,7 +2403,7 @@ xfs_alloc_vextent(
args->minlen > args->maxlen || args->minlen > agsize ||
args->mod >= args->prod) {
args->fsbno = NULLFSBLOCK;
- TRACE_ALLOC("badargs", args);
+ trace_xfs_alloc_vextent_badargs(args);
return 0;
}
minleft = args->minleft;
@@ -2263,24 +2416,21 @@ xfs_alloc_vextent(
* These three force us into a single a.g.
*/
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
- down_read(&mp->m_peraglock);
- args->pag = &mp->m_perag[args->agno];
+ args->pag = xfs_perag_get(mp, args->agno);
args->minleft = 0;
error = xfs_alloc_fix_freelist(args, 0);
args->minleft = minleft;
if (error) {
- TRACE_ALLOC("nofix", args);
+ trace_xfs_alloc_vextent_nofix(args);
goto error0;
}
if (!args->agbp) {
- up_read(&mp->m_peraglock);
- TRACE_ALLOC("noagbp", args);
+ trace_xfs_alloc_vextent_noagbp(args);
break;
}
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
if ((error = xfs_alloc_ag_vextent(args)))
goto error0;
- up_read(&mp->m_peraglock);
break;
case XFS_ALLOCTYPE_START_BNO:
/*
@@ -2332,14 +2482,13 @@ xfs_alloc_vextent(
* Loop over allocation groups twice; first time with
* trylock set, second time without.
*/
- down_read(&mp->m_peraglock);
for (;;) {
- args->pag = &mp->m_perag[args->agno];
+ args->pag = xfs_perag_get(mp, args->agno);
if (no_min) args->minleft = 0;
error = xfs_alloc_fix_freelist(args, flags);
args->minleft = minleft;
if (error) {
- TRACE_ALLOC("nofix", args);
+ trace_xfs_alloc_vextent_nofix(args);
goto error0;
}
/*
@@ -2350,15 +2499,28 @@ xfs_alloc_vextent(
goto error0;
break;
}
- TRACE_ALLOC("loopfailed", args);
+
+ trace_xfs_alloc_vextent_loopfailed(args);
+
/*
* Didn't work, figure out the next iteration.
*/
if (args->agno == sagno &&
type == XFS_ALLOCTYPE_START_BNO)
args->type = XFS_ALLOCTYPE_THIS_AG;
- if (++(args->agno) == mp->m_sb.sb_agcount)
- args->agno = 0;
+ /*
+ * For the first allocation, we can try any AG to get
+ * space. However, if we already have allocated a
+ * block, we don't want to try AGs whose number is below
+ * sagno. Otherwise, we may end up with out-of-order
+ * locking of AGF, which might cause deadlock.
+ */
+ if (++(args->agno) == mp->m_sb.sb_agcount) {
+ if (args->firstblock != NULLFSBLOCK)
+ args->agno = sagno;
+ else
+ args->agno = 0;
+ }
/*
* Reached the starting a.g., must either be done
* or switch to non-trylock mode.
@@ -2366,7 +2528,7 @@ xfs_alloc_vextent(
if (args->agno == sagno) {
if (no_min == 1) {
args->agbno = NULLAGBLOCK;
- TRACE_ALLOC("allfailed", args);
+ trace_xfs_alloc_vextent_allfailed(args);
break;
}
if (flags == 0) {
@@ -2380,8 +2542,8 @@ xfs_alloc_vextent(
}
}
}
+ xfs_perag_put(args->pag);
}
- up_read(&mp->m_peraglock);
if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
if (args->agno == sagno)
mp->m_agfrotor = (mp->m_agfrotor + 1) %
@@ -2407,9 +2569,10 @@ xfs_alloc_vextent(
args->len);
#endif
}
+ xfs_perag_put(args->pag);
return 0;
error0:
- up_read(&mp->m_peraglock);
+ xfs_perag_put(args->pag);
return error;
}
@@ -2424,178 +2587,44 @@ xfs_free_extent(
xfs_fsblock_t bno, /* starting block number of extent */
xfs_extlen_t len) /* length of extent */
{
-#ifdef DEBUG
- xfs_agf_t *agf; /* a.g. freespace header */
-#endif
- xfs_alloc_arg_t args; /* allocation argument structure */
+ xfs_alloc_arg_t args;
int error;
ASSERT(len != 0);
+ memset(&args, 0, sizeof(xfs_alloc_arg_t));
args.tp = tp;
args.mp = tp->t_mountp;
- args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
- ASSERT(args.agno < args.mp->m_sb.sb_agcount);
- args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
- args.alignment = 1;
- args.minlen = args.minleft = args.minalignslop = 0;
- down_read(&args.mp->m_peraglock);
- args.pag = &args.mp->m_perag[args.agno];
- if ((error = xfs_alloc_fix_freelist(&args, 0)))
- goto error0;
-#ifdef DEBUG
- ASSERT(args.agbp != NULL);
- agf = XFS_BUF_TO_AGF(args.agbp);
- ASSERT(args.agbno + len <= be32_to_cpu(agf->agf_length));
-#endif
- error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno,
- len, 0);
-error0:
- up_read(&args.mp->m_peraglock);
- return error;
-}
-
-
-/*
- * AG Busy list management
- * The busy list contains block ranges that have been freed but whose
- * transacations have not yet hit disk. If any block listed in a busy
- * list is reused, the transaction that freed it must be forced to disk
- * before continuing to use the block.
- *
- * xfs_alloc_mark_busy - add to the per-ag busy list
- * xfs_alloc_clear_busy - remove an item from the per-ag busy list
- */
-void
-xfs_alloc_mark_busy(xfs_trans_t *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len)
-{
- xfs_mount_t *mp;
- xfs_perag_busy_t *bsy;
- int n;
- SPLDECL(s);
-
- mp = tp->t_mountp;
- s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
-
- /* search pagb_list for an open slot */
- for (bsy = mp->m_perag[agno].pagb_list, n = 0;
- n < XFS_PAGB_NUM_SLOTS;
- bsy++, n++) {
- if (bsy->busy_tp == NULL) {
- break;
- }
- }
-
- if (n < XFS_PAGB_NUM_SLOTS) {
- bsy = &mp->m_perag[agno].pagb_list[n];
- mp->m_perag[agno].pagb_count++;
- TRACE_BUSY("xfs_alloc_mark_busy", "got", agno, bno, len, n, tp);
- bsy->busy_start = bno;
- bsy->busy_length = len;
- bsy->busy_tp = tp;
- xfs_trans_add_busy(tp, agno, n);
- } else {
- TRACE_BUSY("xfs_alloc_mark_busy", "FULL", agno, bno, len, -1, tp);
- /*
- * The busy list is full! Since it is now not possible to
- * track the free block, make this a synchronous transaction
- * to insure that the block is not reused before this
- * transaction commits.
- */
- xfs_trans_set_sync(tp);
- }
-
- mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
-}
-
-void
-xfs_alloc_clear_busy(xfs_trans_t *tp,
- xfs_agnumber_t agno,
- int idx)
-{
- xfs_mount_t *mp;
- xfs_perag_busy_t *list;
- SPLDECL(s);
-
- mp = tp->t_mountp;
-
- s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
- list = mp->m_perag[agno].pagb_list;
- ASSERT(idx < XFS_PAGB_NUM_SLOTS);
- if (list[idx].busy_tp == tp) {
- TRACE_UNBUSY("xfs_alloc_clear_busy", "found", agno, idx, tp);
- list[idx].busy_tp = NULL;
- mp->m_perag[agno].pagb_count--;
- } else {
- TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp);
- }
-
- mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
-}
-
-
-/*
- * returns non-zero if any of (agno,bno):len is in a busy list
- */
-STATIC int
-xfs_alloc_search_busy(xfs_trans_t *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len)
-{
- xfs_mount_t *mp;
- xfs_perag_busy_t *bsy;
- int n;
- xfs_agblock_t uend, bend;
- xfs_lsn_t lsn;
- int cnt;
- SPLDECL(s);
-
- mp = tp->t_mountp;
-
- s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
- cnt = mp->m_perag[agno].pagb_count;
+ /*
+ * validate that the block number is legal - the enables us to detect
+ * and handle a silent filesystem corruption rather than crashing.
+ */
+ args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
+ if (args.agno >= args.mp->m_sb.sb_agcount)
+ return EFSCORRUPTED;
- uend = bno + len - 1;
+ args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
+ if (args.agbno >= args.mp->m_sb.sb_agblocks)
+ return EFSCORRUPTED;
- /* search pagb_list for this slot, skipping open slots */
- for (bsy = mp->m_perag[agno].pagb_list, n = 0;
- cnt; bsy++, n++) {
+ args.pag = xfs_perag_get(args.mp, args.agno);
+ ASSERT(args.pag);
- /*
- * (start1,length1) within (start2, length2)
- */
- if (bsy->busy_tp != NULL) {
- bend = bsy->busy_start + bsy->busy_length - 1;
- if ((bno > bend) ||
- (uend < bsy->busy_start)) {
- cnt--;
- } else {
- TRACE_BUSYSEARCH("xfs_alloc_search_busy",
- "found1", agno, bno, len, n,
- tp);
- break;
- }
- }
- }
+ error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
+ if (error)
+ goto error0;
- /*
- * If a block was found, force the log through the LSN of the
- * transaction that freed the block
- */
- if (cnt) {
- TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp);
- lsn = bsy->busy_tp->t_commit_lsn;
- mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
- xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
- } else {
- TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp);
- n = -1;
- mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
+ /* validate the extent size is legal now we have the agf locked */
+ if (args.agbno + len >
+ be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
+ error = EFSCORRUPTED;
+ goto error0;
}
- return n;
+ error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
+ if (!error)
+ xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
+error0:
+ xfs_perag_put(args.pag);
+ return error;
}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 3546dea27b7..feacb061bab 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -19,28 +19,77 @@
#define __XFS_ALLOC_H__
struct xfs_buf;
+struct xfs_btree_cur;
struct xfs_mount;
struct xfs_perag;
struct xfs_trans;
+extern struct workqueue_struct *xfs_alloc_wq;
+
/*
* Freespace allocation types. Argument to xfs_alloc_[v]extent.
*/
-typedef enum xfs_alloctype
-{
- XFS_ALLOCTYPE_ANY_AG, /* allocate anywhere, use rotor */
- XFS_ALLOCTYPE_FIRST_AG, /* ... start at ag 0 */
- XFS_ALLOCTYPE_START_AG, /* anywhere, start in this a.g. */
- XFS_ALLOCTYPE_THIS_AG, /* anywhere in this a.g. */
- XFS_ALLOCTYPE_START_BNO, /* near this block else anywhere */
- XFS_ALLOCTYPE_NEAR_BNO, /* in this a.g. and near this block */
- XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */
-} xfs_alloctype_t;
+#define XFS_ALLOCTYPE_ANY_AG 0x01 /* allocate anywhere, use rotor */
+#define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */
+#define XFS_ALLOCTYPE_START_AG 0x04 /* anywhere, start in this a.g. */
+#define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */
+#define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */
+#define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */
+#define XFS_ALLOCTYPE_THIS_BNO 0x40 /* at exactly this block */
+
+/* this should become an enum again when the tracing code is fixed */
+typedef unsigned int xfs_alloctype_t;
+
+#define XFS_ALLOC_TYPES \
+ { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \
+ { XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \
+ { XFS_ALLOCTYPE_START_AG, "START_AG" }, \
+ { XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \
+ { XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \
+ { XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \
+ { XFS_ALLOCTYPE_THIS_BNO, "THIS_BNO" }
/*
* Flags for xfs_alloc_fix_freelist.
*/
#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
+#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
+
+/*
+ * In order to avoid ENOSPC-related deadlock caused by
+ * out-of-order locking of AGF buffer (PV 947395), we place
+ * constraints on the relationship among actual allocations for
+ * data blocks, freelist blocks, and potential file data bmap
+ * btree blocks. However, these restrictions may result in no
+ * actual space allocated for a delayed extent, for example, a data
+ * block in a certain AG is allocated but there is no additional
+ * block for the additional bmap btree block due to a split of the
+ * bmap btree of the file. The result of this may lead to an
+ * infinite loop in xfssyncd when the file gets flushed to disk and
+ * all delayed extents need to be actually allocated. To get around
+ * this, we explicitly set aside a few blocks which will not be
+ * reserved in delayed allocation. Considering the minimum number of
+ * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
+ * btree requires 1 fsb, so we set the number of set-aside blocks
+ * to 4 + 4*agcount.
+ */
+#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
+
+/*
+ * When deciding how much space to allocate out of an AG, we limit the
+ * allocation maximum size to the size the AG. However, we cannot use all the
+ * blocks in the AG - some are permanently used by metadata. These
+ * blocks are generally:
+ * - the AG superblock, AGF, AGI and AGFL
+ * - the AGF (bno and cnt) and AGI btree root blocks
+ * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
+ *
+ * The AG headers are sector sized, so the amount of space they take up is
+ * dependent on filesystem geometry. The others are all single blocks.
+ */
+#define XFS_ALLOC_AG_MAX_USABLE(mp) \
+ ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
+
/*
* Argument structure for xfs_alloc routines.
@@ -68,8 +117,9 @@ typedef struct xfs_alloc_arg {
xfs_alloctype_t otype; /* original allocation type */
char wasdel; /* set if allocation was prev delayed */
char wasfromfl; /* set if allocation is from freelist */
- char isfl; /* set if is freelist blocks - !actg */
+ char isfl; /* set if is freelist blocks - !acctg */
char userdata; /* set if this is user data */
+ xfs_fsblock_t firstblock; /* io first block allocated */
} xfs_alloc_arg_t;
/*
@@ -78,26 +128,12 @@ typedef struct xfs_alloc_arg {
#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/
#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */
-
-#ifdef __KERNEL__
-
-#if defined(XFS_ALLOC_TRACE)
/*
- * Allocation tracing buffer size.
+ * Find the length of the longest extent in an AG.
*/
-#define XFS_ALLOC_TRACE_SIZE 4096
-extern ktrace_t *xfs_alloc_trace_buf;
-
-/*
- * Types for alloc tracing.
- */
-#define XFS_ALLOC_KTRACE_ALLOC 1
-#define XFS_ALLOC_KTRACE_FREE 2
-#define XFS_ALLOC_KTRACE_MODAGF 3
-#define XFS_ALLOC_KTRACE_BUSY 4
-#define XFS_ALLOC_KTRACE_UNBUSY 5
-#define XFS_ALLOC_KTRACE_BUSYSEARCH 6
-#endif
+xfs_extlen_t
+xfs_alloc_longest_free_extent(struct xfs_mount *mp,
+ struct xfs_perag *pag);
/*
* Compute and fill in value of m_ag_maxlevels.
@@ -114,7 +150,8 @@ int /* error */
xfs_alloc_get_freelist(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_buf *agbp, /* buffer containing the agf structure */
- xfs_agblock_t *bnop); /* block address retrieved from freelist */
+ xfs_agblock_t *bnop, /* block address retrieved from freelist */
+ int btreeblk); /* destination is a AGF btree */
/*
* Log the given fields from the agf structure.
@@ -143,7 +180,8 @@ xfs_alloc_put_freelist(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_buf *agbp, /* buffer for a.g. freelist header */
struct xfs_buf *agflbp,/* buffer for a.g. free block array */
- xfs_agblock_t bno); /* block being freed */
+ xfs_agblock_t bno, /* block being freed */
+ int btreeblk); /* owner was a AGF btree */
/*
* Read in the allocation group header (free/alloc section).
@@ -172,18 +210,25 @@ xfs_free_extent(
xfs_fsblock_t bno, /* starting block number of extent */
xfs_extlen_t len); /* length of extent */
-void
-xfs_alloc_mark_busy(xfs_trans_t *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len);
+int /* error */
+xfs_alloc_lookup_le(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat); /* success/failure */
-void
-xfs_alloc_clear_busy(xfs_trans_t *tp,
- xfs_agnumber_t ag,
- int idx);
-
-
-#endif /* __KERNEL__ */
+int /* error */
+xfs_alloc_lookup_ge(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat); /* success/failure */
+
+int /* error */
+xfs_alloc_get_rec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t *bno, /* output: starting block of extent */
+ xfs_extlen_t *len, /* output: length of extent */
+ int *stat); /* output: success/failure */
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index a1d92da86cc..8358f1ded94 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -17,2187 +17,488 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_ialloc.h"
+#include "xfs_alloc_btree.h"
#include "xfs_alloc.h"
+#include "xfs_extent_busy.h"
#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_trans.h"
-/*
- * Prototypes for internal functions.
- */
-
-STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int);
-STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *);
-STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *);
-STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *);
-STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
- xfs_alloc_key_t *, xfs_btree_cur_t **, int *);
-STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int);
-/*
- * Internal functions.
- */
+STATIC struct xfs_btree_cur *
+xfs_allocbt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp,
+ cur->bc_private.a.agbp, cur->bc_private.a.agno,
+ cur->bc_btnum);
+}
-/*
- * Single level of the xfs_alloc_delete record deletion routine.
- * Delete record pointed to by cur/level.
- * Remove the record from its block then rebalance the tree.
- * Return 0 for error, 1 for done, 2 to go on to the next level.
- */
-STATIC int /* error */
-xfs_alloc_delrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level removing record from */
- int *stat) /* fail/done/go-on */
+STATIC void
+xfs_allocbt_set_root(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int inc)
{
- xfs_agf_t *agf; /* allocation group freelist header */
- xfs_alloc_block_t *block; /* btree block record/key lives in */
- xfs_agblock_t bno; /* btree block number */
- xfs_buf_t *bp; /* buffer for block */
- int error; /* error return value */
- int i; /* loop index */
- xfs_alloc_key_t key; /* kp points here if block is level 0 */
- xfs_agblock_t lbno; /* left block's block number */
- xfs_buf_t *lbp; /* left block's buffer pointer */
- xfs_alloc_block_t *left; /* left btree block */
- xfs_alloc_key_t *lkp=NULL; /* left block key pointer */
- xfs_alloc_ptr_t *lpp=NULL; /* left block address pointer */
- int lrecs=0; /* number of records in left block */
- xfs_alloc_rec_t *lrp; /* left block record pointer */
- xfs_mount_t *mp; /* mount structure */
- int ptr; /* index in btree block for this rec */
- xfs_agblock_t rbno; /* right block's block number */
- xfs_buf_t *rbp; /* right block's buffer pointer */
- xfs_alloc_block_t *right; /* right btree block */
- xfs_alloc_key_t *rkp; /* right block key pointer */
- xfs_alloc_ptr_t *rpp; /* right block address pointer */
- int rrecs=0; /* number of records in right block */
- xfs_alloc_rec_t *rrp; /* right block record pointer */
- xfs_btree_cur_t *tcur; /* temporary btree cursor */
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ int btnum = cur->bc_btnum;
+ struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
- /*
- * Get the index of the entry being deleted, check for nothing there.
- */
- ptr = cur->bc_ptrs[level];
- if (ptr == 0) {
- *stat = 0;
- return 0;
- }
- /*
- * Get the buffer & block containing the record or key/ptr.
- */
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
-#endif
- /*
- * Fail if we're off the end of the block.
- */
- if (ptr > be16_to_cpu(block->bb_numrecs)) {
- *stat = 0;
- return 0;
- }
- XFS_STATS_INC(xs_abt_delrec);
- /*
- * It's a nonleaf. Excise the key and ptr being deleted, by
- * sliding the entries past them down one.
- * Log the changed areas of the block.
- */
- if (level > 0) {
- lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
- lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
-#ifdef DEBUG
- for (i = ptr; i < be16_to_cpu(block->bb_numrecs); i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
- return error;
- }
-#endif
- if (ptr < be16_to_cpu(block->bb_numrecs)) {
- memmove(&lkp[ptr - 1], &lkp[ptr],
- (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lkp));
- memmove(&lpp[ptr - 1], &lpp[ptr],
- (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lpp));
- xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
- xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
- }
- }
- /*
- * It's a leaf. Excise the record being deleted, by sliding the
- * entries past it down one. Log the changed areas of the block.
- */
- else {
- lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
- if (ptr < be16_to_cpu(block->bb_numrecs)) {
- memmove(&lrp[ptr - 1], &lrp[ptr],
- (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lrp));
- xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
- }
- /*
- * If it's the first record in the block, we'll need a key
- * structure to pass up to the next level (updkey).
- */
- if (ptr == 1) {
- key.ar_startblock = lrp->ar_startblock;
- key.ar_blockcount = lrp->ar_blockcount;
- lkp = &key;
- }
- }
- /*
- * Decrement and log the number of entries in the block.
- */
- be16_add(&block->bb_numrecs, -1);
- xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
- /*
- * See if the longest free extent in the allocation group was
- * changed by this operation. True if it's the by-size btree, and
- * this is the leaf level, and there is no right sibling block,
- * and this was the last record.
- */
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- mp = cur->bc_mp;
-
- if (level == 0 &&
- cur->bc_btnum == XFS_BTNUM_CNT &&
- be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
- ptr > be16_to_cpu(block->bb_numrecs)) {
- ASSERT(ptr == be16_to_cpu(block->bb_numrecs) + 1);
- /*
- * There are still records in the block. Grab the size
- * from the last one.
- */
- if (be16_to_cpu(block->bb_numrecs)) {
- rrp = XFS_ALLOC_REC_ADDR(block, be16_to_cpu(block->bb_numrecs), cur);
- agf->agf_longest = rrp->ar_blockcount;
- }
- /*
- * No free extents left.
- */
- else
- agf->agf_longest = 0;
- mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest =
- be32_to_cpu(agf->agf_longest);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_LONGEST);
- }
- /*
- * Is this the root level? If so, we're almost done.
- */
- if (level == cur->bc_nlevels - 1) {
- /*
- * If this is the root level,
- * and there's only one entry left,
- * and it's NOT the leaf level,
- * then we can get rid of this level.
- */
- if (be16_to_cpu(block->bb_numrecs) == 1 && level > 0) {
- /*
- * lpp is still set to the first pointer in the block.
- * Make it the new root of the btree.
- */
- bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
- agf->agf_roots[cur->bc_btnum] = *lpp;
- be32_add(&agf->agf_levels[cur->bc_btnum], -1);
- mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--;
- /*
- * Put this buffer/block on the ag's freelist.
- */
- if ((error = xfs_alloc_put_freelist(cur->bc_tp,
- cur->bc_private.a.agbp, NULL, bno)))
- return error;
- /*
- * Since blocks move to the free list without the
- * coordination used in xfs_bmap_finish, we can't allow
- * block to be available for reallocation and
- * non-transaction writing (user data) until we know
- * that the transaction that moved it to the free list
- * is permanently on disk. We track the blocks by
- * declaring these blocks as "busy"; the busy list is
- * maintained on a per-ag basis and each transaction
- * records which entries should be removed when the
- * iclog commits to disk. If a busy block is
- * allocated, the iclog is pushed up to the LSN
- * that freed the block.
- */
- xfs_alloc_mark_busy(cur->bc_tp,
- be32_to_cpu(agf->agf_seqno), bno, 1);
-
- xfs_trans_agbtree_delta(cur->bc_tp, -1);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_ROOTS | XFS_AGF_LEVELS);
- /*
- * Update the cursor so there's one fewer level.
- */
- xfs_btree_setbuf(cur, level, NULL);
- cur->bc_nlevels--;
- } else if (level > 0 &&
- (error = xfs_alloc_decrement(cur, level, &i)))
- return error;
- *stat = 1;
- return 0;
- }
- /*
- * If we deleted the leftmost entry in the block, update the
- * key values above us in the tree.
- */
- if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1)))
- return error;
- /*
- * If the number of records remaining in the block is at least
- * the minimum, we're done.
- */
- if (be16_to_cpu(block->bb_numrecs) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
- if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
- return error;
- *stat = 1;
- return 0;
- }
- /*
- * Otherwise, we have to move some records around to keep the
- * tree balanced. Look at the left and right sibling blocks to
- * see if we can re-balance by moving only one record.
- */
- rbno = be32_to_cpu(block->bb_rightsib);
- lbno = be32_to_cpu(block->bb_leftsib);
- bno = NULLAGBLOCK;
- ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
- /*
- * Duplicate the cursor so our btree manipulations here won't
- * disrupt the next level up.
- */
- if ((error = xfs_btree_dup_cursor(cur, &tcur)))
- return error;
- /*
- * If there's a right sibling, see if it's ok to shift an entry
- * out of it.
- */
- if (rbno != NULLAGBLOCK) {
- /*
- * Move the temp cursor to the last entry in the next block.
- * Actually any entry but the first would suffice.
- */
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_increment(tcur, level, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- /*
- * Grab a pointer to the block.
- */
- rbp = tcur->bc_bufs[level];
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- goto error0;
-#endif
- /*
- * Grab the current block number, for future use.
- */
- bno = be32_to_cpu(right->bb_leftsib);
- /*
- * If right block is full enough so that removing one entry
- * won't make it too empty, and left-shifting an entry out
- * of right to us works, we're done.
- */
- if (be16_to_cpu(right->bb_numrecs) - 1 >=
- XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
- if ((error = xfs_alloc_lshift(tcur, level, &i)))
- goto error0;
- if (i) {
- ASSERT(be16_to_cpu(block->bb_numrecs) >=
- XFS_ALLOC_BLOCK_MINRECS(level, cur));
- xfs_btree_del_cursor(tcur,
- XFS_BTREE_NOERROR);
- if (level > 0 &&
- (error = xfs_alloc_decrement(cur, level,
- &i)))
- return error;
- *stat = 1;
- return 0;
- }
- }
- /*
- * Otherwise, grab the number of records in right for
- * future reference, and fix up the temp cursor to point
- * to our block again (last record).
- */
- rrecs = be16_to_cpu(right->bb_numrecs);
- if (lbno != NULLAGBLOCK) {
- i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_decrement(tcur, level, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- }
- }
- /*
- * If there's a left sibling, see if it's ok to shift an entry
- * out of it.
- */
- if (lbno != NULLAGBLOCK) {
- /*
- * Move the temp cursor to the first entry in the
- * previous block.
- */
- i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_decrement(tcur, level, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- xfs_btree_firstrec(tcur, level);
- /*
- * Grab a pointer to the block.
- */
- lbp = tcur->bc_bufs[level];
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- goto error0;
-#endif
- /*
- * Grab the current block number, for future use.
- */
- bno = be32_to_cpu(left->bb_rightsib);
- /*
- * If left block is full enough so that removing one entry
- * won't make it too empty, and right-shifting an entry out
- * of left to us works, we're done.
- */
- if (be16_to_cpu(left->bb_numrecs) - 1 >=
- XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
- if ((error = xfs_alloc_rshift(tcur, level, &i)))
- goto error0;
- if (i) {
- ASSERT(be16_to_cpu(block->bb_numrecs) >=
- XFS_ALLOC_BLOCK_MINRECS(level, cur));
- xfs_btree_del_cursor(tcur,
- XFS_BTREE_NOERROR);
- if (level == 0)
- cur->bc_ptrs[0]++;
- *stat = 1;
- return 0;
- }
- }
- /*
- * Otherwise, grab the number of records in right for
- * future reference.
- */
- lrecs = be16_to_cpu(left->bb_numrecs);
- }
- /*
- * Delete the temp cursor, we're done with it.
- */
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- /*
- * If here, we need to do a join to keep the tree balanced.
- */
- ASSERT(bno != NULLAGBLOCK);
- /*
- * See if we can join with the left neighbor block.
- */
- if (lbno != NULLAGBLOCK &&
- lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- /*
- * Set "right" to be the starting block,
- * "left" to be the left neighbor.
- */
- rbno = bno;
- right = block;
- rbp = bp;
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, lbno, 0, &lbp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
- }
- /*
- * If that won't work, see if we can join with the right neighbor block.
- */
- else if (rbno != NULLAGBLOCK &&
- rrecs + be16_to_cpu(block->bb_numrecs) <=
- XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- /*
- * Set "left" to be the starting block,
- * "right" to be the right neighbor.
- */
- lbno = bno;
- left = block;
- lbp = bp;
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, rbno, 0, &rbp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- return error;
- }
- /*
- * Otherwise, we can't fix the imbalance.
- * Just return. This is probably a logic error, but it's not fatal.
- */
- else {
- if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
- return error;
- *stat = 1;
- return 0;
- }
- /*
- * We're now going to join "left" and "right" by moving all the stuff
- * in "right" to "left" and deleting "right".
- */
- if (level > 0) {
- /*
- * It's a non-leaf. Move keys and pointers.
- */
- lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
- lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
- rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
- rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
- return error;
- }
-#endif
- memcpy(lkp, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*lkp));
- memcpy(lpp, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*lpp));
- xfs_alloc_log_keys(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
- be16_to_cpu(left->bb_numrecs) +
- be16_to_cpu(right->bb_numrecs));
- xfs_alloc_log_ptrs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
- be16_to_cpu(left->bb_numrecs) +
- be16_to_cpu(right->bb_numrecs));
- } else {
- /*
- * It's a leaf. Move records.
- */
- lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
- rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- memcpy(lrp, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*lrp));
- xfs_alloc_log_recs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
- be16_to_cpu(left->bb_numrecs) +
- be16_to_cpu(right->bb_numrecs));
- }
- /*
- * If we joined with the left neighbor, set the buffer in the
- * cursor to the left block, and fix up the index.
- */
- if (bp != lbp) {
- xfs_btree_setbuf(cur, level, lbp);
- cur->bc_ptrs[level] += be16_to_cpu(left->bb_numrecs);
- }
- /*
- * If we joined with the right neighbor and there's a level above
- * us, increment the cursor at that level.
- */
- else if (level + 1 < cur->bc_nlevels &&
- (error = xfs_alloc_increment(cur, level + 1, &i)))
- return error;
- /*
- * Fix up the number of records in the surviving block.
- */
- be16_add(&left->bb_numrecs, be16_to_cpu(right->bb_numrecs));
- /*
- * Fix up the right block pointer in the surviving block, and log it.
- */
- left->bb_rightsib = right->bb_rightsib;
- xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
- /*
- * If there is a right sibling now, make it point to the
- * remaining block.
- */
- if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
- xfs_alloc_block_t *rrblock;
- xfs_buf_t *rrbp;
-
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
- &rrbp, XFS_ALLOC_BTREE_REF)))
- return error;
- rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
- if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
- return error;
- rrblock->bb_leftsib = cpu_to_be32(lbno);
- xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
- }
- /*
- * Free the deleting block by putting it on the freelist.
- */
- if ((error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp,
- NULL, rbno)))
- return error;
- /*
- * Since blocks move to the free list without the coordination
- * used in xfs_bmap_finish, we can't allow block to be available
- * for reallocation and non-transaction writing (user data)
- * until we know that the transaction that moved it to the free
- * list is permanently on disk. We track the blocks by declaring
- * these blocks as "busy"; the busy list is maintained on a
- * per-ag basis and each transaction records which entries
- * should be removed when the iclog commits to disk. If a
- * busy block is allocated, the iclog is pushed up to the
- * LSN that freed the block.
- */
- xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
- xfs_trans_agbtree_delta(cur->bc_tp, -1);
+ ASSERT(ptr->s != 0);
- /*
- * Adjust the current level's cursor so that we're left referring
- * to the right node, after we're done.
- * If this leaves the ptr value 0 our caller will fix it up.
- */
- if (level > 0)
- cur->bc_ptrs[level]--;
- /*
- * Return value means the next level up has something to do.
- */
- *stat = 2;
- return 0;
+ agf->agf_roots[btnum] = ptr->s;
+ be32_add_cpu(&agf->agf_levels[btnum], inc);
+ pag->pagf_levels[btnum] += inc;
+ xfs_perag_put(pag);
-error0:
- xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
- return error;
+ xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
}
-/*
- * Insert one record/level. Return information to the caller
- * allowing the next level up to proceed if necessary.
- */
-STATIC int /* error */
-xfs_alloc_insrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to insert record at */
- xfs_agblock_t *bnop, /* i/o: block number inserted */
- xfs_alloc_rec_t *recp, /* i/o: record data inserted */
- xfs_btree_cur_t **curp, /* output: new cursor replacing cur */
- int *stat) /* output: success/failure */
+STATIC int
+xfs_allocbt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int *stat)
{
- xfs_agf_t *agf; /* allocation group freelist header */
- xfs_alloc_block_t *block; /* btree block record/key lives in */
- xfs_buf_t *bp; /* buffer for block */
- int error; /* error return value */
- int i; /* loop index */
- xfs_alloc_key_t key; /* key value being inserted */
- xfs_alloc_key_t *kp; /* pointer to btree keys */
- xfs_agblock_t nbno; /* block number of allocated block */
- xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
- xfs_alloc_key_t nkey; /* new key value, from split */
- xfs_alloc_rec_t nrec; /* new record value, for caller */
- int optr; /* old ptr value */
- xfs_alloc_ptr_t *pp; /* pointer to btree addresses */
- int ptr; /* index in btree block for this rec */
- xfs_alloc_rec_t *rp; /* pointer to btree records */
-
- ASSERT(be32_to_cpu(recp->ar_blockcount) > 0);
+ int error;
+ xfs_agblock_t bno;
- /*
- * GCC doesn't understand the (arguably complex) control flow in
- * this function and complains about uninitialized structure fields
- * without this.
- */
- memset(&nrec, 0, sizeof(nrec));
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- /*
- * If we made it to the root level, allocate a new root block
- * and we're done.
- */
- if (level >= cur->bc_nlevels) {
- XFS_STATS_INC(xs_abt_insrec);
- if ((error = xfs_alloc_newroot(cur, &i)))
- return error;
- *bnop = NULLAGBLOCK;
- *stat = i;
- return 0;
+ /* Allocate the new block from the freelist. If we can't, give up. */
+ error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+ &bno, 1);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
}
- /*
- * Make a key out of the record data to be inserted, and save it.
- */
- key.ar_startblock = recp->ar_startblock;
- key.ar_blockcount = recp->ar_blockcount;
- optr = ptr = cur->bc_ptrs[level];
- /*
- * If we're off the left edge, return failure.
- */
- if (ptr == 0) {
+
+ if (bno == NULLAGBLOCK) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
- XFS_STATS_INC(xs_abt_insrec);
- /*
- * Get pointers to the btree buffer and block.
- */
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
- /*
- * Check that the new entry is being inserted in the right place.
- */
- if (ptr <= be16_to_cpu(block->bb_numrecs)) {
- if (level == 0) {
- rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
- xfs_btree_check_rec(cur->bc_btnum, recp, rp);
- } else {
- kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
- xfs_btree_check_key(cur->bc_btnum, &key, kp);
- }
- }
-#endif
- nbno = NULLAGBLOCK;
- ncur = (xfs_btree_cur_t *)0;
- /*
- * If the block is full, we can't insert the new entry until we
- * make the block un-full.
- */
- if (be16_to_cpu(block->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- /*
- * First, try shifting an entry to the right neighbor.
- */
- if ((error = xfs_alloc_rshift(cur, level, &i)))
- return error;
- if (i) {
- /* nothing */
- }
- /*
- * Next, try shifting an entry to the left neighbor.
- */
- else {
- if ((error = xfs_alloc_lshift(cur, level, &i)))
- return error;
- if (i)
- optr = ptr = cur->bc_ptrs[level];
- else {
- /*
- * Next, try splitting the current block in
- * half. If this works we have to re-set our
- * variables because we could be in a
- * different block now.
- */
- if ((error = xfs_alloc_split(cur, level, &nbno,
- &nkey, &ncur, &i)))
- return error;
- if (i) {
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error =
- xfs_btree_check_sblock(cur,
- block, level, bp)))
- return error;
-#endif
- ptr = cur->bc_ptrs[level];
- nrec.ar_startblock = nkey.ar_startblock;
- nrec.ar_blockcount = nkey.ar_blockcount;
- }
- /*
- * Otherwise the insert fails.
- */
- else {
- *stat = 0;
- return 0;
- }
- }
- }
- }
- /*
- * At this point we know there's room for our new entry in the block
- * we're pointing at.
- */
- if (level > 0) {
- /*
- * It's a non-leaf entry. Make a hole for the new data
- * in the key and ptr regions of the block.
- */
- kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
- pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
-#ifdef DEBUG
- for (i = be16_to_cpu(block->bb_numrecs); i >= ptr; i--) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
- return error;
- }
-#endif
- memmove(&kp[ptr], &kp[ptr - 1],
- (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*kp));
- memmove(&pp[ptr], &pp[ptr - 1],
- (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*pp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
- return error;
-#endif
- /*
- * Now stuff the new data in, bump numrecs and log the new data.
- */
- kp[ptr - 1] = key;
- pp[ptr - 1] = cpu_to_be32(*bnop);
- be16_add(&block->bb_numrecs, 1);
- xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
- xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
-#ifdef DEBUG
- if (ptr < be16_to_cpu(block->bb_numrecs))
- xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
- kp + ptr);
-#endif
- } else {
- /*
- * It's a leaf entry. Make a hole for the new record.
- */
- rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
- memmove(&rp[ptr], &rp[ptr - 1],
- (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*rp));
- /*
- * Now stuff the new record in, bump numrecs
- * and log the new data.
- */
- rp[ptr - 1] = *recp; /* INT_: struct copy */
- be16_add(&block->bb_numrecs, 1);
- xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
-#ifdef DEBUG
- if (ptr < be16_to_cpu(block->bb_numrecs))
- xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
- rp + ptr);
-#endif
- }
- /*
- * Log the new number of records in the btree header.
- */
- xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
- /*
- * If we inserted at the start of a block, update the parents' keys.
- */
- if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1)))
- return error;
- /*
- * Look to see if the longest extent in the allocation group
- * needs to be updated.
- */
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- if (level == 0 &&
- cur->bc_btnum == XFS_BTNUM_CNT &&
- be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
- be32_to_cpu(recp->ar_blockcount) > be32_to_cpu(agf->agf_longest)) {
- /*
- * If this is a leaf in the by-size btree and there
- * is no right sibling block and this block is bigger
- * than the previous longest block, update it.
- */
- agf->agf_longest = recp->ar_blockcount;
- cur->bc_mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest
- = be32_to_cpu(recp->ar_blockcount);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_LONGEST);
- }
- /*
- * Return the new block number, if any.
- * If there is one, give back a record value and a cursor too.
- */
- *bnop = nbno;
- if (nbno != NULLAGBLOCK) {
- *recp = nrec; /* INT_: struct copy */
- *curp = ncur; /* INT_: struct copy */
- }
+ xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
+
+ xfs_trans_agbtree_delta(cur->bc_tp, 1);
+ new->s = cpu_to_be32(bno);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
}
-/*
- * Log header fields from a btree block.
- */
-STATIC void
-xfs_alloc_log_block(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_buf_t *bp, /* buffer containing btree block */
- int fields) /* mask of fields: XFS_BB_... */
+STATIC int
+xfs_allocbt_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
{
- int first; /* first byte offset logged */
- int last; /* last byte offset logged */
- static const short offsets[] = { /* table of offsets */
- offsetof(xfs_alloc_block_t, bb_magic),
- offsetof(xfs_alloc_block_t, bb_level),
- offsetof(xfs_alloc_block_t, bb_numrecs),
- offsetof(xfs_alloc_block_t, bb_leftsib),
- offsetof(xfs_alloc_block_t, bb_rightsib),
- sizeof(xfs_alloc_block_t)
- };
-
- xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
- xfs_trans_log_buf(tp, bp, first, last);
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agblock_t bno;
+ int error;
+
+ bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
+ error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
+ if (error)
+ return error;
+
+ xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
+ XFS_EXTENT_BUSY_SKIP_DISCARD);
+ xfs_trans_agbtree_delta(cur->bc_tp, -1);
+
+ xfs_trans_binval(cur->bc_tp, bp);
+ return 0;
}
/*
- * Log keys from a btree block (nonleaf).
+ * Update the longest extent in the AGF
*/
STATIC void
-xfs_alloc_log_keys(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_buf_t *bp, /* buffer containing btree block */
- int kfirst, /* index of first key to log */
- int klast) /* index of last key to log */
+xfs_allocbt_update_lastrec(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ union xfs_btree_rec *rec,
+ int ptr,
+ int reason)
{
- xfs_alloc_block_t *block; /* btree block to log from */
- int first; /* first byte offset logged */
- xfs_alloc_key_t *kp; /* key pointer in btree block */
- int last; /* last byte offset logged */
-
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+ xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ struct xfs_perag *pag;
+ __be32 len;
+ int numrecs;
+
+ ASSERT(cur->bc_btnum == XFS_BTNUM_CNT);
+
+ switch (reason) {
+ case LASTREC_UPDATE:
+ /*
+ * If this is the last leaf block and it's the last record,
+ * then update the size of the longest extent in the AG.
+ */
+ if (ptr != xfs_btree_get_numrecs(block))
+ return;
+ len = rec->alloc.ar_blockcount;
+ break;
+ case LASTREC_INSREC:
+ if (be32_to_cpu(rec->alloc.ar_blockcount) <=
+ be32_to_cpu(agf->agf_longest))
+ return;
+ len = rec->alloc.ar_blockcount;
+ break;
+ case LASTREC_DELREC:
+ numrecs = xfs_btree_get_numrecs(block);
+ if (ptr <= numrecs)
+ return;
+ ASSERT(ptr == numrecs + 1);
+
+ if (numrecs) {
+ xfs_alloc_rec_t *rrp;
+
+ rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
+ len = rrp->ar_blockcount;
+ } else {
+ len = 0;
+ }
+
+ break;
+ default:
+ ASSERT(0);
+ return;
+ }
+
+ agf->agf_longest = len;
+ pag = xfs_perag_get(cur->bc_mp, seqno);
+ pag->pagf_longest = be32_to_cpu(len);
+ xfs_perag_put(pag);
+ xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
}
-/*
- * Log block pointer fields from a btree block (nonleaf).
- */
-STATIC void
-xfs_alloc_log_ptrs(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_buf_t *bp, /* buffer containing btree block */
- int pfirst, /* index of first pointer to log */
- int plast) /* index of last pointer to log */
+STATIC int
+xfs_allocbt_get_minrecs(
+ struct xfs_btree_cur *cur,
+ int level)
{
- xfs_alloc_block_t *block; /* btree block to log from */
- int first; /* first byte offset logged */
- int last; /* last byte offset logged */
- xfs_alloc_ptr_t *pp; /* block-pointer pointer in btree blk */
-
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ return cur->bc_mp->m_alloc_mnr[level != 0];
}
-/*
- * Log records from a btree block (leaf).
- */
-STATIC void
-xfs_alloc_log_recs(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_buf_t *bp, /* buffer containing btree block */
- int rfirst, /* index of first record to log */
- int rlast) /* index of last record to log */
+STATIC int
+xfs_allocbt_get_maxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
{
- xfs_alloc_block_t *block; /* btree block to log from */
- int first; /* first byte offset logged */
- int last; /* last byte offset logged */
- xfs_alloc_rec_t *rp; /* record pointer for btree block */
-
-
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
-#ifdef DEBUG
- {
- xfs_agf_t *agf;
- xfs_alloc_rec_t *p;
-
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
- ASSERT(be32_to_cpu(p->ar_startblock) +
- be32_to_cpu(p->ar_blockcount) <=
- be32_to_cpu(agf->agf_length));
- }
-#endif
- first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ return cur->bc_mp->m_alloc_mxr[level != 0];
}
-/*
- * Lookup the record. The cursor is made to point to it, based on dir.
- * Return 0 if can't find any such record, 1 for success.
- */
-STATIC int /* error */
-xfs_alloc_lookup(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_lookup_t dir, /* <=, ==, or >= */
- int *stat) /* success/failure */
+STATIC void
+xfs_allocbt_init_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
{
- xfs_agblock_t agbno; /* a.g. relative btree block number */
- xfs_agnumber_t agno; /* allocation group number */
- xfs_alloc_block_t *block=NULL; /* current btree block */
- int diff; /* difference for the current key */
- int error; /* error return value */
- int keyno=0; /* current key number */
- int level; /* level in the btree */
- xfs_mount_t *mp; /* file system mount point */
-
- XFS_STATS_INC(xs_abt_lookup);
- /*
- * Get the allocation group header, and the root block number.
- */
- mp = cur->bc_mp;
+ ASSERT(rec->alloc.ar_startblock != 0);
- {
- xfs_agf_t *agf; /* a.g. freespace header */
+ key->alloc.ar_startblock = rec->alloc.ar_startblock;
+ key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
+}
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- agno = be32_to_cpu(agf->agf_seqno);
- agbno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
- }
- /*
- * Iterate over each level in the btree, starting at the root.
- * For each level above the leaves, find the key we need, based
- * on the lookup record, then follow the corresponding block
- * pointer down to the next level.
- */
- for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
- xfs_buf_t *bp; /* buffer pointer for btree block */
- xfs_daddr_t d; /* disk address of btree block */
+STATIC void
+xfs_allocbt_init_rec_from_key(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ ASSERT(key->alloc.ar_startblock != 0);
- /*
- * Get the disk address we're looking for.
- */
- d = XFS_AGB_TO_DADDR(mp, agno, agbno);
- /*
- * If the old buffer at this level is for a different block,
- * throw it away, otherwise just use it.
- */
- bp = cur->bc_bufs[level];
- if (bp && XFS_BUF_ADDR(bp) != d)
- bp = (xfs_buf_t *)0;
- if (!bp) {
- /*
- * Need to get a new buffer. Read it, then
- * set it in the cursor, releasing the old one.
- */
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno,
- agbno, 0, &bp, XFS_ALLOC_BTREE_REF)))
- return error;
- xfs_btree_setbuf(cur, level, bp);
- /*
- * Point to the btree block, now that we have the buffer
- */
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- if ((error = xfs_btree_check_sblock(cur, block, level,
- bp)))
- return error;
- } else
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- /*
- * If we already had a key match at a higher level, we know
- * we need to use the first entry in this block.
- */
- if (diff == 0)
- keyno = 1;
- /*
- * Otherwise we need to search this block. Do a binary search.
- */
- else {
- int high; /* high entry number */
- xfs_alloc_key_t *kkbase=NULL;/* base of keys in block */
- xfs_alloc_rec_t *krbase=NULL;/* base of records in block */
- int low; /* low entry number */
-
- /*
- * Get a pointer to keys or records.
- */
- if (level > 0)
- kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur);
- else
- krbase = XFS_ALLOC_REC_ADDR(block, 1, cur);
- /*
- * Set low and high entry numbers, 1-based.
- */
- low = 1;
- if (!(high = be16_to_cpu(block->bb_numrecs))) {
- /*
- * If the block is empty, the tree must
- * be an empty leaf.
- */
- ASSERT(level == 0 && cur->bc_nlevels == 1);
- cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
- *stat = 0;
- return 0;
- }
- /*
- * Binary search the block.
- */
- while (low <= high) {
- xfs_extlen_t blockcount; /* key value */
- xfs_agblock_t startblock; /* key value */
-
- XFS_STATS_INC(xs_abt_compare);
- /*
- * keyno is average of low and high.
- */
- keyno = (low + high) >> 1;
- /*
- * Get startblock & blockcount.
- */
- if (level > 0) {
- xfs_alloc_key_t *kkp;
-
- kkp = kkbase + keyno - 1;
- startblock = be32_to_cpu(kkp->ar_startblock);
- blockcount = be32_to_cpu(kkp->ar_blockcount);
- } else {
- xfs_alloc_rec_t *krp;
-
- krp = krbase + keyno - 1;
- startblock = be32_to_cpu(krp->ar_startblock);
- blockcount = be32_to_cpu(krp->ar_blockcount);
- }
- /*
- * Compute difference to get next direction.
- */
- if (cur->bc_btnum == XFS_BTNUM_BNO)
- diff = (int)startblock -
- (int)cur->bc_rec.a.ar_startblock;
- else if (!(diff = (int)blockcount -
- (int)cur->bc_rec.a.ar_blockcount))
- diff = (int)startblock -
- (int)cur->bc_rec.a.ar_startblock;
- /*
- * Less than, move right.
- */
- if (diff < 0)
- low = keyno + 1;
- /*
- * Greater than, move left.
- */
- else if (diff > 0)
- high = keyno - 1;
- /*
- * Equal, we're done.
- */
- else
- break;
- }
- }
- /*
- * If there are more levels, set up for the next level
- * by getting the block number and filling in the cursor.
- */
- if (level > 0) {
- /*
- * If we moved left, need the previous key number,
- * unless there isn't one.
- */
- if (diff > 0 && --keyno < 1)
- keyno = 1;
- agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, keyno, cur));
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, agbno, level)))
- return error;
-#endif
- cur->bc_ptrs[level] = keyno;
- }
- }
- /*
- * Done with the search.
- * See if we need to adjust the results.
- */
- if (dir != XFS_LOOKUP_LE && diff < 0) {
- keyno++;
- /*
- * If ge search and we went off the end of the block, but it's
- * not the last block, we're in the wrong block.
- */
- if (dir == XFS_LOOKUP_GE &&
- keyno > be16_to_cpu(block->bb_numrecs) &&
- be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
- int i;
-
- cur->bc_ptrs[0] = keyno;
- if ((error = xfs_alloc_increment(cur, 0, &i)))
- return error;
- XFS_WANT_CORRUPTED_RETURN(i == 1);
- *stat = 1;
- return 0;
- }
- }
- else if (dir == XFS_LOOKUP_LE && diff > 0)
- keyno--;
- cur->bc_ptrs[0] = keyno;
- /*
- * Return if we succeeded or not.
- */
- if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
- *stat = 0;
- else
- *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
- return 0;
+ rec->alloc.ar_startblock = key->alloc.ar_startblock;
+ rec->alloc.ar_blockcount = key->alloc.ar_blockcount;
}
-/*
- * Move 1 record left from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int /* error */
-xfs_alloc_lshift(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to shift record on */
- int *stat) /* success/failure */
+STATIC void
+xfs_allocbt_init_rec_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
{
- int error; /* error return value */
-#ifdef DEBUG
- int i; /* loop index */
-#endif
- xfs_alloc_key_t key; /* key value for leaf level upward */
- xfs_buf_t *lbp; /* buffer for left neighbor block */
- xfs_alloc_block_t *left; /* left neighbor btree block */
- int nrec; /* new number of left block entries */
- xfs_buf_t *rbp; /* buffer for right (current) block */
- xfs_alloc_block_t *right; /* right (current) btree block */
- xfs_alloc_key_t *rkp=NULL; /* key pointer for right block */
- xfs_alloc_ptr_t *rpp=NULL; /* address pointer for right block */
- xfs_alloc_rec_t *rrp=NULL; /* record pointer for right block */
+ ASSERT(cur->bc_rec.a.ar_startblock != 0);
- /*
- * Set up variables for this block as "right".
- */
- rbp = cur->bc_bufs[level];
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- return error;
-#endif
- /*
- * If we've got no left sibling then we can't shift an entry left.
- */
- if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * If the cursor entry is the one that would be moved, don't
- * do it... it's too complicated.
- */
- if (cur->bc_ptrs[level] <= 1) {
- *stat = 0;
- return 0;
- }
- /*
- * Set up the left neighbor as "left".
- */
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
- 0, &lbp, XFS_ALLOC_BTREE_REF)))
- return error;
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
- /*
- * If it's full, it can't take another entry.
- */
- if (be16_to_cpu(left->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- *stat = 0;
- return 0;
- }
- nrec = be16_to_cpu(left->bb_numrecs) + 1;
- /*
- * If non-leaf, copy a key and a ptr to the left block.
- */
- if (level > 0) {
- xfs_alloc_key_t *lkp; /* key pointer for left block */
- xfs_alloc_ptr_t *lpp; /* address pointer for left block */
-
- lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur);
- rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
- *lkp = *rkp;
- xfs_alloc_log_keys(cur, lbp, nrec, nrec);
- lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
- rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
- return error;
-#endif
- *lpp = *rpp; /* INT_: copy */
- xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
- xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
- }
- /*
- * If leaf, copy a record to the left block.
- */
- else {
- xfs_alloc_rec_t *lrp; /* record pointer for left block */
-
- lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur);
- rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- *lrp = *rrp;
- xfs_alloc_log_recs(cur, lbp, nrec, nrec);
- xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
- }
- /*
- * Bump and log left's numrecs, decrement and log right's numrecs.
- */
- be16_add(&left->bb_numrecs, 1);
- xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
- be16_add(&right->bb_numrecs, -1);
- xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
- /*
- * Slide the contents of right down one entry.
- */
- if (level > 0) {
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
- level)))
- return error;
- }
-#endif
- memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
- xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- } else {
- memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- key.ar_startblock = rrp->ar_startblock;
- key.ar_blockcount = rrp->ar_blockcount;
- rkp = &key;
- }
- /*
- * Update the parent key values of right.
- */
- if ((error = xfs_alloc_updkey(cur, rkp, level + 1)))
- return error;
- /*
- * Slide the cursor value left one.
- */
- cur->bc_ptrs[level]--;
- *stat = 1;
- return 0;
+ rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
+ rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
}
-/*
- * Allocate a new root block, fill it in.
- */
-STATIC int /* error */
-xfs_alloc_newroot(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *stat) /* success/failure */
+STATIC void
+xfs_allocbt_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
{
- int error; /* error return value */
- xfs_agblock_t lbno; /* left block number */
- xfs_buf_t *lbp; /* left btree buffer */
- xfs_alloc_block_t *left; /* left btree block */
- xfs_mount_t *mp; /* mount structure */
- xfs_agblock_t nbno; /* new block number */
- xfs_buf_t *nbp; /* new (root) buffer */
- xfs_alloc_block_t *new; /* new (root) btree block */
- int nptr; /* new value for key index, 1 or 2 */
- xfs_agblock_t rbno; /* right block number */
- xfs_buf_t *rbp; /* right btree buffer */
- xfs_alloc_block_t *right; /* right btree block */
-
- mp = cur->bc_mp;
-
- ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp));
- /*
- * Get a buffer from the freelist blocks, for the new root.
- */
- if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
- &nbno)))
- return error;
- /*
- * None available, we fail.
- */
- if (nbno == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- xfs_trans_agbtree_delta(cur->bc_tp, 1);
- nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno,
- 0);
- new = XFS_BUF_TO_ALLOC_BLOCK(nbp);
- /*
- * Set the root data in the a.g. freespace structure.
- */
- {
- xfs_agf_t *agf; /* a.g. freespace header */
- xfs_agnumber_t seqno;
-
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno);
- be32_add(&agf->agf_levels[cur->bc_btnum], 1);
- seqno = be32_to_cpu(agf->agf_seqno);
- mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_ROOTS | XFS_AGF_LEVELS);
- }
- /*
- * At the previous root level there are now two blocks: the old
- * root, and the new block generated when it was split.
- * We don't know which one the cursor is pointing at, so we
- * set up variables "left" and "right" for each case.
- */
- lbp = cur->bc_bufs[cur->bc_nlevels - 1];
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp)))
- return error;
-#endif
- if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
- /*
- * Our block is left, pick up the right block.
- */
- lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
- rbno = be32_to_cpu(left->bb_rightsib);
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, rbno, 0, &rbp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
- if ((error = xfs_btree_check_sblock(cur, right,
- cur->bc_nlevels - 1, rbp)))
- return error;
- nptr = 1;
- } else {
- /*
- * Our block is right, pick up the left block.
- */
- rbp = lbp;
- right = left;
- rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
- lbno = be32_to_cpu(right->bb_leftsib);
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, lbno, 0, &lbp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
- if ((error = xfs_btree_check_sblock(cur, left,
- cur->bc_nlevels - 1, lbp)))
- return error;
- nptr = 2;
- }
- /*
- * Fill in the new block's btree header and log it.
- */
- new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
- new->bb_level = cpu_to_be16(cur->bc_nlevels);
- new->bb_numrecs = cpu_to_be16(2);
- new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
- new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
- ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
- /*
- * Fill in the key data in the new root.
- */
- {
- xfs_alloc_key_t *kp; /* btree key pointer */
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- kp = XFS_ALLOC_KEY_ADDR(new, 1, cur);
- if (be16_to_cpu(left->bb_level) > 0) {
- kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); /* INT_: structure copy */
- kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);/* INT_: structure copy */
- } else {
- xfs_alloc_rec_t *rp; /* btree record pointer */
-
- rp = XFS_ALLOC_REC_ADDR(left, 1, cur);
- kp[0].ar_startblock = rp->ar_startblock;
- kp[0].ar_blockcount = rp->ar_blockcount;
- rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- kp[1].ar_startblock = rp->ar_startblock;
- kp[1].ar_blockcount = rp->ar_blockcount;
- }
- }
- xfs_alloc_log_keys(cur, nbp, 1, 2);
- /*
- * Fill in the pointer data in the new root.
- */
- {
- xfs_alloc_ptr_t *pp; /* btree address pointer */
+ ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
- pp = XFS_ALLOC_PTR_ADDR(new, 1, cur);
- pp[0] = cpu_to_be32(lbno);
- pp[1] = cpu_to_be32(rbno);
- }
- xfs_alloc_log_ptrs(cur, nbp, 1, 2);
- /*
- * Fix up the cursor.
- */
- xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
- cur->bc_ptrs[cur->bc_nlevels] = nptr;
- cur->bc_nlevels++;
- *stat = 1;
- return 0;
+ ptr->s = agf->agf_roots[cur->bc_btnum];
}
-/*
- * Move 1 record right from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int /* error */
-xfs_alloc_rshift(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to shift record on */
- int *stat) /* success/failure */
+STATIC __int64_t
+xfs_allocbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
{
- int error; /* error return value */
- int i; /* loop index */
- xfs_alloc_key_t key; /* key value for leaf level upward */
- xfs_buf_t *lbp; /* buffer for left (current) block */
- xfs_alloc_block_t *left; /* left (current) btree block */
- xfs_buf_t *rbp; /* buffer for right neighbor block */
- xfs_alloc_block_t *right; /* right neighbor btree block */
- xfs_alloc_key_t *rkp; /* key pointer for right block */
- xfs_btree_cur_t *tcur; /* temporary cursor */
+ xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
+ xfs_alloc_key_t *kp = &key->alloc;
+ __int64_t diff;
- /*
- * Set up variables for this block as "left".
- */
- lbp = cur->bc_bufs[level];
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
-#endif
- /*
- * If we've got no right sibling then we can't shift an entry right.
- */
- if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * If the cursor entry is the one that would be moved, don't
- * do it... it's too complicated.
- */
- if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
- *stat = 0;
- return 0;
+ if (cur->bc_btnum == XFS_BTNUM_BNO) {
+ return (__int64_t)be32_to_cpu(kp->ar_startblock) -
+ rec->ar_startblock;
}
- /*
- * Set up the right neighbor as "right".
- */
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
- 0, &rbp, XFS_ALLOC_BTREE_REF)))
- return error;
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- return error;
- /*
- * If it's full, it can't take another entry.
- */
- if (be16_to_cpu(right->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- *stat = 0;
- return 0;
- }
- /*
- * Make a hole at the start of the right neighbor block, then
- * copy the last left block entry to the hole.
- */
- if (level > 0) {
- xfs_alloc_key_t *lkp; /* key pointer for left block */
- xfs_alloc_ptr_t *lpp; /* address pointer for left block */
- xfs_alloc_ptr_t *rpp; /* address pointer for right block */
-
- lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
- rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
- return error;
- }
-#endif
- memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
- return error;
-#endif
- *rkp = *lkp; /* INT_: copy */
- *rpp = *lpp; /* INT_: copy */
- xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
- } else {
- xfs_alloc_rec_t *lrp; /* record pointer for left block */
- xfs_alloc_rec_t *rrp; /* record pointer for right block */
-
- lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- *rrp = *lrp;
- xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- key.ar_startblock = rrp->ar_startblock;
- key.ar_blockcount = rrp->ar_blockcount;
- rkp = &key;
- xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
- }
- /*
- * Decrement and log left's numrecs, bump and log right's numrecs.
- */
- be16_add(&left->bb_numrecs, -1);
- xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
- be16_add(&right->bb_numrecs, 1);
- xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
- /*
- * Using a temporary cursor, update the parent key values of the
- * block on the right.
- */
- if ((error = xfs_btree_dup_cursor(cur, &tcur)))
- return error;
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_increment(tcur, level, &i)) ||
- (error = xfs_alloc_updkey(tcur, rkp, level + 1)))
- goto error0;
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- *stat = 1;
- return 0;
-error0:
- xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
- return error;
-}
-/*
- * Split cur/level block in half.
- * Return new block number and its first record (to be inserted into parent).
- */
-STATIC int /* error */
-xfs_alloc_split(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to split */
- xfs_agblock_t *bnop, /* output: block number allocated */
- xfs_alloc_key_t *keyp, /* output: first key of new block */
- xfs_btree_cur_t **curp, /* output: new cursor */
- int *stat) /* success/failure */
-{
- int error; /* error return value */
- int i; /* loop index/record number */
- xfs_agblock_t lbno; /* left (current) block number */
- xfs_buf_t *lbp; /* buffer for left block */
- xfs_alloc_block_t *left; /* left (current) btree block */
- xfs_agblock_t rbno; /* right (new) block number */
- xfs_buf_t *rbp; /* buffer for right block */
- xfs_alloc_block_t *right; /* right (new) btree block */
+ diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
+ if (diff)
+ return diff;
- /*
- * Allocate the new block from the freelist.
- * If we can't do it, we're toast. Give up.
- */
- if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
- &rbno)))
- return error;
- if (rbno == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- xfs_trans_agbtree_delta(cur->bc_tp, 1);
- rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno,
- rbno, 0);
- /*
- * Set up the new block as "right".
- */
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
- /*
- * "Left" is the current (according to the cursor) block.
- */
- lbp = cur->bc_bufs[level];
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
-#endif
- /*
- * Fill in the btree header for the new block.
- */
- right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
- right->bb_level = left->bb_level;
- right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
- /*
- * Make sure that if there's an odd number of entries now, that
- * each new block will have the same number of entries.
- */
- if ((be16_to_cpu(left->bb_numrecs) & 1) &&
- cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
- be16_add(&right->bb_numrecs, 1);
- i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
- /*
- * For non-leaf blocks, copy keys and addresses over to the new block.
- */
- if (level > 0) {
- xfs_alloc_key_t *lkp; /* left btree key pointer */
- xfs_alloc_ptr_t *lpp; /* left btree address pointer */
- xfs_alloc_key_t *rkp; /* right btree key pointer */
- xfs_alloc_ptr_t *rpp; /* right btree address pointer */
-
- lkp = XFS_ALLOC_KEY_ADDR(left, i, cur);
- lpp = XFS_ALLOC_PTR_ADDR(left, i, cur);
- rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
- rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
- return error;
- }
-#endif
- memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
- xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- *keyp = *rkp;
- }
- /*
- * For leaf blocks, copy records over to the new block.
- */
- else {
- xfs_alloc_rec_t *lrp; /* left btree record pointer */
- xfs_alloc_rec_t *rrp; /* right btree record pointer */
-
- lrp = XFS_ALLOC_REC_ADDR(left, i, cur);
- rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- keyp->ar_startblock = rrp->ar_startblock;
- keyp->ar_blockcount = rrp->ar_blockcount;
- }
- /*
- * Find the left block number by looking in the buffer.
- * Adjust numrecs, sibling pointers.
- */
- lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
- be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
- right->bb_rightsib = left->bb_rightsib;
- left->bb_rightsib = cpu_to_be32(rbno);
- right->bb_leftsib = cpu_to_be32(lbno);
- xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
- xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
- /*
- * If there's a block to the new block's right, make that block
- * point back to right instead of to left.
- */
- if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
- xfs_alloc_block_t *rrblock; /* rr btree block */
- xfs_buf_t *rrbp; /* buffer for rrblock */
-
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(right->bb_rightsib), 0,
- &rrbp, XFS_ALLOC_BTREE_REF)))
- return error;
- rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
- if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
- return error;
- rrblock->bb_leftsib = cpu_to_be32(rbno);
- xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
- }
- /*
- * If the cursor is really in the right block, move it there.
- * If it's just pointing past the last entry in left, then we'll
- * insert there, so don't change anything in that case.
- */
- if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
- xfs_btree_setbuf(cur, level, rbp);
- cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
- }
- /*
- * If there are more levels, we'll need another cursor which refers to
- * the right block, no matter where this cursor was.
- */
- if (level + 1 < cur->bc_nlevels) {
- if ((error = xfs_btree_dup_cursor(cur, curp)))
- return error;
- (*curp)->bc_ptrs[level + 1]++;
- }
- *bnop = rbno;
- *stat = 1;
- return 0;
+ return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}
-/*
- * Update keys at all levels from here to the root along the cursor's path.
- */
-STATIC int /* error */
-xfs_alloc_updkey(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_alloc_key_t *keyp, /* new key value to update to */
- int level) /* starting level for update */
+static bool
+xfs_allocbt_verify(
+ struct xfs_buf *bp)
{
- int ptr; /* index of key in block */
-
- /*
- * Go up the tree from this level toward the root.
- * At each level, update the key value to the value input.
- * Stop when we reach a level where the cursor isn't pointing
- * at the first entry in the block.
- */
- for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
- xfs_alloc_block_t *block; /* btree block */
- xfs_buf_t *bp; /* buffer for block */
-#ifdef DEBUG
- int error; /* error return value */
-#endif
- xfs_alloc_key_t *kp; /* ptr to btree block keys */
-
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
-#endif
- ptr = cur->bc_ptrs[level];
- kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
- *kp = *keyp;
- xfs_alloc_log_keys(cur, bp, ptr, ptr);
- }
- return 0;
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_perag *pag = bp->b_pag;
+ unsigned int level;
+
+ /*
+ * magic number and level verification
+ *
+ * During growfs operations, we can't verify the exact level or owner as
+ * the perag is not fully initialised and hence not attached to the
+ * buffer. In this case, check against the maximum tree depth.
+ *
+ * Similarly, during log recovery we will have a perag structure
+ * attached, but the agf information will not yet have been initialised
+ * from the on disk AGF. Again, we can only check against maximum limits
+ * in this case.
+ */
+ level = be16_to_cpu(block->bb_level);
+ switch (block->bb_magic) {
+ case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+ return false;
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ return false;
+ if (pag &&
+ be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ return false;
+ /* fall through */
+ case cpu_to_be32(XFS_ABTB_MAGIC):
+ if (pag && pag->pagf_init) {
+ if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
+ return false;
+ } else if (level >= mp->m_ag_maxlevels)
+ return false;
+ break;
+ case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+ return false;
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ return false;
+ if (pag &&
+ be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ return false;
+ /* fall through */
+ case cpu_to_be32(XFS_ABTC_MAGIC):
+ if (pag && pag->pagf_init) {
+ if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
+ return false;
+ } else if (level >= mp->m_ag_maxlevels)
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
+ return false;
+
+ /* sibling pointer verification */
+ if (!block->bb_u.s.bb_leftsib ||
+ (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+ if (!block->bb_u.s.bb_rightsib ||
+ (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+
+ return true;
}
-/*
- * Externally visible routines.
- */
-
-/*
- * Decrement cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-int /* error */
-xfs_alloc_decrement(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level in btree, 0 is leaf */
- int *stat) /* success/failure */
+static void
+xfs_allocbt_read_verify(
+ struct xfs_buf *bp)
{
- xfs_alloc_block_t *block; /* btree block */
- int error; /* error return value */
- int lev; /* btree level */
+ if (!xfs_btree_sblock_verify_crc(bp))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_allocbt_verify(bp))
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
- ASSERT(level < cur->bc_nlevels);
- /*
- * Read-ahead to the left at this level.
- */
- xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
- /*
- * Decrement the ptr at this level. If we're still in the block
- * then we're done.
- */
- if (--cur->bc_ptrs[level] > 0) {
- *stat = 1;
- return 0;
- }
- /*
- * Get a pointer to the btree block.
- */
- block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level,
- cur->bc_bufs[level])))
- return error;
-#endif
- /*
- * If we just went off the left edge of the tree, return failure.
- */
- if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * March up the tree decrementing pointers.
- * Stop when we don't go off the left edge of a block.
- */
- for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- if (--cur->bc_ptrs[lev] > 0)
- break;
- /*
- * Read-ahead the left block, we're going to read it
- * in the next loop.
- */
- xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
- }
- /*
- * If we went off the root then we are seriously confused.
- */
- ASSERT(lev < cur->bc_nlevels);
- /*
- * Now walk back down the tree, fixing up the cursor's buffer
- * pointers and key numbers.
- */
- for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
- xfs_agblock_t agbno; /* block number of btree block */
- xfs_buf_t *bp; /* buffer pointer for block */
-
- agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, agbno, 0, &bp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- lev--;
- xfs_btree_setbuf(cur, lev, bp);
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
- return error;
- cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
+ if (bp->b_error) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_verifier_error(bp);
}
- *stat = 1;
- return 0;
}
-/*
- * Delete the record pointed to by cur.
- * The cursor refers to the place where the record was (could be inserted)
- * when the operation returns.
- */
-int /* error */
-xfs_alloc_delete(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *stat) /* success/failure */
+static void
+xfs_allocbt_write_verify(
+ struct xfs_buf *bp)
{
- int error; /* error return value */
- int i; /* result code */
- int level; /* btree level */
-
- /*
- * Go up the tree, starting at leaf level.
- * If 2 is returned then a join was done; go to the next level.
- * Otherwise we are done.
- */
- for (level = 0, i = 2; i == 2; level++) {
- if ((error = xfs_alloc_delrec(cur, level, &i)))
- return error;
+ if (!xfs_allocbt_verify(bp)) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
}
- if (i == 0) {
- for (level = 1; level < cur->bc_nlevels; level++) {
- if (cur->bc_ptrs[level] == 0) {
- if ((error = xfs_alloc_decrement(cur, level, &i)))
- return error;
- break;
- }
- }
- }
- *stat = i;
- return 0;
+ xfs_btree_sblock_calc_crc(bp);
+
}
-/*
- * Get the data from the pointed-to record.
- */
-int /* error */
-xfs_alloc_get_rec(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t *bno, /* output: starting block of extent */
- xfs_extlen_t *len, /* output: length of extent */
- int *stat) /* output: success/failure */
-{
- xfs_alloc_block_t *block; /* btree block */
-#ifdef DEBUG
- int error; /* error return value */
-#endif
- int ptr; /* record number */
+const struct xfs_buf_ops xfs_allocbt_buf_ops = {
+ .verify_read = xfs_allocbt_read_verify,
+ .verify_write = xfs_allocbt_write_verify,
+};
- ptr = cur->bc_ptrs[0];
- block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
- return error;
-#endif
- /*
- * Off the right end or left end, return failure.
- */
- if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
- *stat = 0;
- return 0;
- }
- /*
- * Point to the record and extract its data.
- */
- {
- xfs_alloc_rec_t *rec; /* record data */
- rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
- *bno = be32_to_cpu(rec->ar_startblock);
- *len = be32_to_cpu(rec->ar_blockcount);
+#if defined(DEBUG) || defined(XFS_WARN)
+STATIC int
+xfs_allocbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ if (cur->bc_btnum == XFS_BTNUM_BNO) {
+ return be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock);
+ } else {
+ return be32_to_cpu(k1->alloc.ar_blockcount) <
+ be32_to_cpu(k2->alloc.ar_blockcount) ||
+ (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
+ be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock));
}
- *stat = 1;
- return 0;
}
-/*
- * Increment cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-int /* error */
-xfs_alloc_increment(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level in btree, 0 is leaf */
- int *stat) /* success/failure */
+STATIC int
+xfs_allocbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
{
- xfs_alloc_block_t *block; /* btree block */
- xfs_buf_t *bp; /* tree block buffer */
- int error; /* error return value */
- int lev; /* btree level */
-
- ASSERT(level < cur->bc_nlevels);
- /*
- * Read-ahead to the right at this level.
- */
- xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
- /*
- * Get a pointer to the btree block.
- */
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
-#endif
- /*
- * Increment the ptr at this level. If we're still in the block
- * then we're done.
- */
- if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
- *stat = 1;
- return 0;
- }
- /*
- * If we just went off the right edge of the tree, return failure.
- */
- if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * March up the tree incrementing pointers.
- * Stop when we don't go off the right edge of a block.
- */
- for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- bp = cur->bc_bufs[lev];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
- return error;
-#endif
- if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
- break;
- /*
- * Read-ahead the right block, we're going to read it
- * in the next loop.
- */
- xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
- }
- /*
- * If we went off the root then we are seriously confused.
- */
- ASSERT(lev < cur->bc_nlevels);
- /*
- * Now walk back down the tree, fixing up the cursor's buffer
- * pointers and key numbers.
- */
- for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- lev > level; ) {
- xfs_agblock_t agbno; /* block number of btree block */
-
- agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, agbno, 0, &bp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- lev--;
- xfs_btree_setbuf(cur, lev, bp);
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
- return error;
- cur->bc_ptrs[lev] = 1;
+ if (cur->bc_btnum == XFS_BTNUM_BNO) {
+ return be32_to_cpu(r1->alloc.ar_startblock) +
+ be32_to_cpu(r1->alloc.ar_blockcount) <=
+ be32_to_cpu(r2->alloc.ar_startblock);
+ } else {
+ return be32_to_cpu(r1->alloc.ar_blockcount) <
+ be32_to_cpu(r2->alloc.ar_blockcount) ||
+ (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
+ be32_to_cpu(r1->alloc.ar_startblock) <
+ be32_to_cpu(r2->alloc.ar_startblock));
}
- *stat = 1;
- return 0;
}
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_allocbt_ops = {
+ .rec_len = sizeof(xfs_alloc_rec_t),
+ .key_len = sizeof(xfs_alloc_key_t),
+
+ .dup_cursor = xfs_allocbt_dup_cursor,
+ .set_root = xfs_allocbt_set_root,
+ .alloc_block = xfs_allocbt_alloc_block,
+ .free_block = xfs_allocbt_free_block,
+ .update_lastrec = xfs_allocbt_update_lastrec,
+ .get_minrecs = xfs_allocbt_get_minrecs,
+ .get_maxrecs = xfs_allocbt_get_maxrecs,
+ .init_key_from_rec = xfs_allocbt_init_key_from_rec,
+ .init_rec_from_key = xfs_allocbt_init_rec_from_key,
+ .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
+ .key_diff = xfs_allocbt_key_diff,
+ .buf_ops = &xfs_allocbt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+ .keys_inorder = xfs_allocbt_keys_inorder,
+ .recs_inorder = xfs_allocbt_recs_inorder,
+#endif
+};
/*
- * Insert the current record at the point referenced by cur.
- * The cursor may be inconsistent on return if splits have been done.
+ * Allocate a new allocation btree cursor.
*/
-int /* error */
-xfs_alloc_insert(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *stat) /* success/failure */
+struct xfs_btree_cur * /* new alloc btree cursor */
+xfs_allocbt_init_cursor(
+ struct xfs_mount *mp, /* file system mount point */
+ struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_buf *agbp, /* buffer for agf structure */
+ xfs_agnumber_t agno, /* allocation group number */
+ xfs_btnum_t btnum) /* btree identifier */
{
- int error; /* error return value */
- int i; /* result value, 0 for failure */
- int level; /* current level number in btree */
- xfs_agblock_t nbno; /* new block number (split result) */
- xfs_btree_cur_t *ncur; /* new cursor (split result) */
- xfs_alloc_rec_t nrec; /* record being inserted this level */
- xfs_btree_cur_t *pcur; /* previous level's cursor */
-
- level = 0;
- nbno = NULLAGBLOCK;
- nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
- nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
- ncur = (xfs_btree_cur_t *)0;
- pcur = cur;
- /*
- * Loop going up the tree, starting at the leaf level.
- * Stop when we don't get a split block, that must mean that
- * the insert is finished with this level.
- */
- do {
- /*
- * Insert nrec/nbno into this level of the tree.
- * Note if we fail, nbno will be null.
- */
- if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur,
- &i))) {
- if (pcur != cur)
- xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
- return error;
- }
- /*
- * See if the cursor we just used is trash.
- * Can't trash the caller's cursor, but otherwise we should
- * if ncur is a new cursor or we're about to be done.
- */
- if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
- cur->bc_nlevels = pcur->bc_nlevels;
- xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
- }
- /*
- * If we got a new cursor, switch to it.
- */
- if (ncur) {
- pcur = ncur;
- ncur = (xfs_btree_cur_t *)0;
- }
- } while (nbno != NULLAGBLOCK);
- *stat = i;
- return 0;
-}
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ struct xfs_btree_cur *cur;
-/*
- * Lookup the record equal to [bno, len] in the btree given by cur.
- */
-int /* error */
-xfs_alloc_lookup_eq(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t bno, /* starting block of extent */
- xfs_extlen_t len, /* length of extent */
- int *stat) /* success/failure */
-{
- cur->bc_rec.a.ar_startblock = bno;
- cur->bc_rec.a.ar_blockcount = len;
- return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat);
-}
+ ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
-/*
- * Lookup the first record greater than or equal to [bno, len]
- * in the btree given by cur.
- */
-int /* error */
-xfs_alloc_lookup_ge(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t bno, /* starting block of extent */
- xfs_extlen_t len, /* length of extent */
- int *stat) /* success/failure */
-{
- cur->bc_rec.a.ar_startblock = bno;
- cur->bc_rec.a.ar_blockcount = len;
- return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat);
-}
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
-/*
- * Lookup the first record less than or equal to [bno, len]
- * in the btree given by cur.
- */
-int /* error */
-xfs_alloc_lookup_le(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t bno, /* starting block of extent */
- xfs_extlen_t len, /* length of extent */
- int *stat) /* success/failure */
-{
- cur->bc_rec.a.ar_startblock = bno;
- cur->bc_rec.a.ar_blockcount = len;
- return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat);
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
+ cur->bc_btnum = btnum;
+ cur->bc_blocklog = mp->m_sb.sb_blocklog;
+ cur->bc_ops = &xfs_allocbt_ops;
+
+ if (btnum == XFS_BTNUM_CNT) {
+ cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
+ cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
+ } else {
+ cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
+ }
+
+ cur->bc_private.a.agbp = agbp;
+ cur->bc_private.a.agno = agno;
+
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
+
+ return cur;
}
/*
- * Update the record referred to by cur, to the value given by [bno, len].
- * This either works (return 0) or gets an EFSCORRUPTED error.
+ * Calculate number of records in an alloc btree block.
*/
-int /* error */
-xfs_alloc_update(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t bno, /* starting block of extent */
- xfs_extlen_t len) /* length of extent */
+int
+xfs_allocbt_maxrecs(
+ struct xfs_mount *mp,
+ int blocklen,
+ int leaf)
{
- xfs_alloc_block_t *block; /* btree block to update */
- int error; /* error return value */
- int ptr; /* current record number (updating) */
+ blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
- ASSERT(len > 0);
- /*
- * Pick up the a.g. freelist struct and the current block.
- */
- block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
- return error;
-#endif
- /*
- * Get the address of the rec to be updated.
- */
- ptr = cur->bc_ptrs[0];
- {
- xfs_alloc_rec_t *rp; /* pointer to updated record */
-
- rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
- /*
- * Fill in the new contents and log them.
- */
- rp->ar_startblock = cpu_to_be32(bno);
- rp->ar_blockcount = cpu_to_be32(len);
- xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
- }
- /*
- * If it's the by-size btree and it's the last leaf block and
- * it's the last record... then update the size of the longest
- * extent in the a.g., which we cache in the a.g. freelist header.
- */
- if (cur->bc_btnum == XFS_BTNUM_CNT &&
- be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
- ptr == be16_to_cpu(block->bb_numrecs)) {
- xfs_agf_t *agf; /* a.g. freespace header */
- xfs_agnumber_t seqno;
-
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- seqno = be32_to_cpu(agf->agf_seqno);
- cur->bc_mp->m_perag[seqno].pagf_longest = len;
- agf->agf_longest = cpu_to_be32(len);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_LONGEST);
- }
- /*
- * Updating first record in leaf. Pass new key value up to our parent.
- */
- if (ptr == 1) {
- xfs_alloc_key_t key; /* key containing [bno, len] */
-
- key.ar_startblock = cpu_to_be32(bno);
- key.ar_blockcount = cpu_to_be32(len);
- if ((error = xfs_alloc_updkey(cur, &key, 1)))
- return error;
- }
- return 0;
+ if (leaf)
+ return blocklen / sizeof(xfs_alloc_rec_t);
+ return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
}
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index bce81c7a4fd..45e189e7e81 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -24,136 +24,42 @@
struct xfs_buf;
struct xfs_btree_cur;
-struct xfs_btree_sblock;
struct xfs_mount;
/*
- * There are two on-disk btrees, one sorted by blockno and one sorted
- * by blockcount and blockno. All blocks look the same to make the code
- * simpler; if we have time later, we'll make the optimizations.
+ * Btree block header size depends on a superblock flag.
*/
-#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */
-#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */
-
-/*
- * Data record/key structure
- */
-typedef struct xfs_alloc_rec {
- __be32 ar_startblock; /* starting block number */
- __be32 ar_blockcount; /* count of free blocks */
-} xfs_alloc_rec_t, xfs_alloc_key_t;
-
-typedef struct xfs_alloc_rec_incore {
- xfs_agblock_t ar_startblock; /* starting block number */
- xfs_extlen_t ar_blockcount; /* count of free blocks */
-} xfs_alloc_rec_incore_t;
-
-/* btree pointer type */
-typedef __be32 xfs_alloc_ptr_t;
-/* btree block header type */
-typedef struct xfs_btree_sblock xfs_alloc_block_t;
-
-#define XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)XFS_BUF_PTR(bp))
-
-/*
- * Real block structures have a size equal to the disk block size.
- */
-#define XFS_ALLOC_BLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog)
-#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_alloc_mxr[lev != 0])
-#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_alloc_mnr[lev != 0])
-
-/*
- * Minimum and maximum blocksize and sectorsize.
- * The blocksize upper limit is pretty much arbitrary.
- * The sectorsize upper limit is due to sizeof(sb_sectsize).
- */
-#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */
-#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */
-#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG)
-#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG)
-#define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */
-#define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */
-#define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG)
-#define XFS_MAX_SECTORSIZE (1 << XFS_MAX_SECTORSIZE_LOG)
-
-/*
- * Block numbers in the AG:
- * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3.
- */
-#define XFS_BNO_BLOCK(mp) ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1))
-#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1))
+#define XFS_ALLOC_BLOCK_LEN(mp) \
+ (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
+ XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
/*
* Record, key, and pointer address macros for btree blocks.
- */
-#define XFS_ALLOC_REC_ADDR(bb,i,cur) \
- XFS_BTREE_REC_ADDR(XFS_ALLOC_BLOCK_SIZE(0,cur), xfs_alloc, \
- bb, i, XFS_ALLOC_BLOCK_MAXRECS(0, cur))
-
-#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \
- XFS_BTREE_KEY_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, \
- bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
-
-#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \
- XFS_BTREE_PTR_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, \
- bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
-
-/*
- * Decrement cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-extern int xfs_alloc_decrement(struct xfs_btree_cur *cur, int level, int *stat);
-
-/*
- * Delete the record pointed to by cur.
- * The cursor refers to the place where the record was (could be inserted)
- * when the operation returns.
- */
-extern int xfs_alloc_delete(struct xfs_btree_cur *cur, int *stat);
-
-/*
- * Get the data from the pointed-to record.
- */
-extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno,
- xfs_extlen_t *len, int *stat);
-
-/*
- * Increment cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-extern int xfs_alloc_increment(struct xfs_btree_cur *cur, int level, int *stat);
-
-/*
- * Insert the current record at the point referenced by cur.
- * The cursor may be inconsistent on return if splits have been done.
- */
-extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat);
-
-/*
- * Lookup the record equal to [bno, len] in the btree given by cur.
- */
-extern int xfs_alloc_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, int *stat);
-
-/*
- * Lookup the first record greater than or equal to [bno, len]
- * in the btree given by cur.
- */
-extern int xfs_alloc_lookup_ge(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, int *stat);
-
-/*
- * Lookup the first record less than or equal to [bno, len]
- * in the btree given by cur.
- */
-extern int xfs_alloc_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, int *stat);
-
-/*
- * Update the record referred to by cur, to the value given by [bno, len].
- * This either works (return 0) or gets an EFSCORRUPTED error.
- */
-extern int xfs_alloc_update(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len);
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+#define XFS_ALLOC_REC_ADDR(mp, block, index) \
+ ((xfs_alloc_rec_t *) \
+ ((char *)(block) + \
+ XFS_ALLOC_BLOCK_LEN(mp) + \
+ (((index) - 1) * sizeof(xfs_alloc_rec_t))))
+
+#define XFS_ALLOC_KEY_ADDR(mp, block, index) \
+ ((xfs_alloc_key_t *) \
+ ((char *)(block) + \
+ XFS_ALLOC_BLOCK_LEN(mp) + \
+ ((index) - 1) * sizeof(xfs_alloc_key_t)))
+
+#define XFS_ALLOC_PTR_ADDR(mp, block, index, maxrecs) \
+ ((xfs_alloc_ptr_t *) \
+ ((char *)(block) + \
+ XFS_ALLOC_BLOCK_LEN(mp) + \
+ (maxrecs) * sizeof(xfs_alloc_key_t) + \
+ ((index) - 1) * sizeof(xfs_alloc_ptr_t)))
+
+extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
+ struct xfs_trans *, struct xfs_buf *,
+ xfs_agnumber_t, xfs_btnum_t);
+extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
#endif /* __XFS_ALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
new file mode 100644
index 00000000000..faaf716e208
--- /dev/null
+++ b/fs/xfs/xfs_aops.c
@@ -0,0 +1,1770 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "xfs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_iomap.h"
+#include "xfs_trace.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include <linux/aio.h>
+#include <linux/gfp.h>
+#include <linux/mpage.h>
+#include <linux/pagevec.h>
+#include <linux/writeback.h>
+
+void
+xfs_count_page_state(
+ struct page *page,
+ int *delalloc,
+ int *unwritten)
+{
+ struct buffer_head *bh, *head;
+
+ *delalloc = *unwritten = 0;
+
+ bh = head = page_buffers(page);
+ do {
+ if (buffer_unwritten(bh))
+ (*unwritten) = 1;
+ else if (buffer_delay(bh))
+ (*delalloc) = 1;
+ } while ((bh = bh->b_this_page) != head);
+}
+
+STATIC struct block_device *
+xfs_find_bdev_for_inode(
+ struct inode *inode)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+
+ if (XFS_IS_REALTIME_INODE(ip))
+ return mp->m_rtdev_targp->bt_bdev;
+ else
+ return mp->m_ddev_targp->bt_bdev;
+}
+
+/*
+ * We're now finished for good with this ioend structure.
+ * Update the page state via the associated buffer_heads,
+ * release holds on the inode and bio, and finally free
+ * up memory. Do not use the ioend after this.
+ */
+STATIC void
+xfs_destroy_ioend(
+ xfs_ioend_t *ioend)
+{
+ struct buffer_head *bh, *next;
+
+ for (bh = ioend->io_buffer_head; bh; bh = next) {
+ next = bh->b_private;
+ bh->b_end_io(bh, !ioend->io_error);
+ }
+
+ mempool_free(ioend, xfs_ioend_pool);
+}
+
+/*
+ * Fast and loose check if this write could update the on-disk inode size.
+ */
+static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
+{
+ return ioend->io_offset + ioend->io_size >
+ XFS_I(ioend->io_inode)->i_d.di_size;
+}
+
+STATIC int
+xfs_setfilesize_trans_alloc(
+ struct xfs_ioend *ioend)
+{
+ struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
+ struct xfs_trans *tp;
+ int error;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return error;
+ }
+
+ ioend->io_append_trans = tp;
+
+ /*
+ * We may pass freeze protection with a transaction. So tell lockdep
+ * we released it.
+ */
+ rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
+ 1, _THIS_IP_);
+ /*
+ * We hand off the transaction to the completion thread now, so
+ * clear the flag here.
+ */
+ current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ return 0;
+}
+
+/*
+ * Update on-disk file size now that data has been written to disk.
+ */
+STATIC int
+xfs_setfilesize(
+ struct xfs_ioend *ioend)
+{
+ struct xfs_inode *ip = XFS_I(ioend->io_inode);
+ struct xfs_trans *tp = ioend->io_append_trans;
+ xfs_fsize_t isize;
+
+ /*
+ * The transaction may have been allocated in the I/O submission thread,
+ * thus we need to mark ourselves as beeing in a transaction manually.
+ * Similarly for freeze protection.
+ */
+ current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
+ 0, 1, _THIS_IP_);
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
+ if (!isize) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_cancel(tp, 0);
+ return 0;
+ }
+
+ trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
+
+ ip->i_d.di_size = isize;
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ return xfs_trans_commit(tp, 0);
+}
+
+/*
+ * Schedule IO completion handling on the final put of an ioend.
+ *
+ * If there is no work to do we might as well call it a day and free the
+ * ioend right now.
+ */
+STATIC void
+xfs_finish_ioend(
+ struct xfs_ioend *ioend)
+{
+ if (atomic_dec_and_test(&ioend->io_remaining)) {
+ struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
+
+ if (ioend->io_type == XFS_IO_UNWRITTEN)
+ queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
+ else if (ioend->io_append_trans ||
+ (ioend->io_isdirect && xfs_ioend_is_append(ioend)))
+ queue_work(mp->m_data_workqueue, &ioend->io_work);
+ else
+ xfs_destroy_ioend(ioend);
+ }
+}
+
+/*
+ * IO write completion.
+ */
+STATIC void
+xfs_end_io(
+ struct work_struct *work)
+{
+ xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work);
+ struct xfs_inode *ip = XFS_I(ioend->io_inode);
+ int error = 0;
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ ioend->io_error = -EIO;
+ goto done;
+ }
+ if (ioend->io_error)
+ goto done;
+
+ /*
+ * For unwritten extents we need to issue transactions to convert a
+ * range to normal written extens after the data I/O has finished.
+ */
+ if (ioend->io_type == XFS_IO_UNWRITTEN) {
+ error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
+ ioend->io_size);
+ } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) {
+ /*
+ * For direct I/O we do not know if we need to allocate blocks
+ * or not so we can't preallocate an append transaction as that
+ * results in nested reservations and log space deadlocks. Hence
+ * allocate the transaction here. While this is sub-optimal and
+ * can block IO completion for some time, we're stuck with doing
+ * it this way until we can pass the ioend to the direct IO
+ * allocation callbacks and avoid nesting that way.
+ */
+ error = xfs_setfilesize_trans_alloc(ioend);
+ if (error)
+ goto done;
+ error = xfs_setfilesize(ioend);
+ } else if (ioend->io_append_trans) {
+ error = xfs_setfilesize(ioend);
+ } else {
+ ASSERT(!xfs_ioend_is_append(ioend));
+ }
+
+done:
+ if (error)
+ ioend->io_error = -error;
+ xfs_destroy_ioend(ioend);
+}
+
+/*
+ * Call IO completion handling in caller context on the final put of an ioend.
+ */
+STATIC void
+xfs_finish_ioend_sync(
+ struct xfs_ioend *ioend)
+{
+ if (atomic_dec_and_test(&ioend->io_remaining))
+ xfs_end_io(&ioend->io_work);
+}
+
+/*
+ * Allocate and initialise an IO completion structure.
+ * We need to track unwritten extent write completion here initially.
+ * We'll need to extend this for updating the ondisk inode size later
+ * (vs. incore size).
+ */
+STATIC xfs_ioend_t *
+xfs_alloc_ioend(
+ struct inode *inode,
+ unsigned int type)
+{
+ xfs_ioend_t *ioend;
+
+ ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
+
+ /*
+ * Set the count to 1 initially, which will prevent an I/O
+ * completion callback from happening before we have started
+ * all the I/O from calling the completion routine too early.
+ */
+ atomic_set(&ioend->io_remaining, 1);
+ ioend->io_isdirect = 0;
+ ioend->io_error = 0;
+ ioend->io_list = NULL;
+ ioend->io_type = type;
+ ioend->io_inode = inode;
+ ioend->io_buffer_head = NULL;
+ ioend->io_buffer_tail = NULL;
+ ioend->io_offset = 0;
+ ioend->io_size = 0;
+ ioend->io_append_trans = NULL;
+
+ INIT_WORK(&ioend->io_work, xfs_end_io);
+ return ioend;
+}
+
+STATIC int
+xfs_map_blocks(
+ struct inode *inode,
+ loff_t offset,
+ struct xfs_bmbt_irec *imap,
+ int type,
+ int nonblocking)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ ssize_t count = 1 << inode->i_blkbits;
+ xfs_fileoff_t offset_fsb, end_fsb;
+ int error = 0;
+ int bmapi_flags = XFS_BMAPI_ENTIRE;
+ int nimaps = 1;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -XFS_ERROR(EIO);
+
+ if (type == XFS_IO_UNWRITTEN)
+ bmapi_flags |= XFS_BMAPI_IGSTATE;
+
+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+ if (nonblocking)
+ return -XFS_ERROR(EAGAIN);
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ }
+
+ ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
+ (ip->i_df.if_flags & XFS_IFEXTENTS));
+ ASSERT(offset <= mp->m_super->s_maxbytes);
+
+ if (offset + count > mp->m_super->s_maxbytes)
+ count = mp->m_super->s_maxbytes - offset;
+ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
+ imap, &nimaps, bmapi_flags);
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+ if (error)
+ return -XFS_ERROR(error);
+
+ if (type == XFS_IO_DELALLOC &&
+ (!nimaps || isnullstartblock(imap->br_startblock))) {
+ error = xfs_iomap_write_allocate(ip, offset, imap);
+ if (!error)
+ trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
+ return -XFS_ERROR(error);
+ }
+
+#ifdef DEBUG
+ if (type == XFS_IO_UNWRITTEN) {
+ ASSERT(nimaps);
+ ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+ ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+ }
+#endif
+ if (nimaps)
+ trace_xfs_map_blocks_found(ip, offset, count, type, imap);
+ return 0;
+}
+
+STATIC int
+xfs_imap_valid(
+ struct inode *inode,
+ struct xfs_bmbt_irec *imap,
+ xfs_off_t offset)
+{
+ offset >>= inode->i_blkbits;
+
+ return offset >= imap->br_startoff &&
+ offset < imap->br_startoff + imap->br_blockcount;
+}
+
+/*
+ * BIO completion handler for buffered IO.
+ */
+STATIC void
+xfs_end_bio(
+ struct bio *bio,
+ int error)
+{
+ xfs_ioend_t *ioend = bio->bi_private;
+
+ ASSERT(atomic_read(&bio->bi_cnt) >= 1);
+ ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
+
+ /* Toss bio and pass work off to an xfsdatad thread */
+ bio->bi_private = NULL;
+ bio->bi_end_io = NULL;
+ bio_put(bio);
+
+ xfs_finish_ioend(ioend);
+}
+
+STATIC void
+xfs_submit_ioend_bio(
+ struct writeback_control *wbc,
+ xfs_ioend_t *ioend,
+ struct bio *bio)
+{
+ atomic_inc(&ioend->io_remaining);
+ bio->bi_private = ioend;
+ bio->bi_end_io = xfs_end_bio;
+ submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
+}
+
+STATIC struct bio *
+xfs_alloc_ioend_bio(
+ struct buffer_head *bh)
+{
+ int nvecs = bio_get_nr_vecs(bh->b_bdev);
+ struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
+
+ ASSERT(bio->bi_private == NULL);
+ bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_bdev = bh->b_bdev;
+ return bio;
+}
+
+STATIC void
+xfs_start_buffer_writeback(
+ struct buffer_head *bh)
+{
+ ASSERT(buffer_mapped(bh));
+ ASSERT(buffer_locked(bh));
+ ASSERT(!buffer_delay(bh));
+ ASSERT(!buffer_unwritten(bh));
+
+ mark_buffer_async_write(bh);
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
+}
+
+STATIC void
+xfs_start_page_writeback(
+ struct page *page,
+ int clear_dirty,
+ int buffers)
+{
+ ASSERT(PageLocked(page));
+ ASSERT(!PageWriteback(page));
+ if (clear_dirty)
+ clear_page_dirty_for_io(page);
+ set_page_writeback(page);
+ unlock_page(page);
+ /* If no buffers on the page are to be written, finish it here */
+ if (!buffers)
+ end_page_writeback(page);
+}
+
+static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
+{
+ return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+}
+
+/*
+ * Submit all of the bios for all of the ioends we have saved up, covering the
+ * initial writepage page and also any probed pages.
+ *
+ * Because we may have multiple ioends spanning a page, we need to start
+ * writeback on all the buffers before we submit them for I/O. If we mark the
+ * buffers as we got, then we can end up with a page that only has buffers
+ * marked async write and I/O complete on can occur before we mark the other
+ * buffers async write.
+ *
+ * The end result of this is that we trip a bug in end_page_writeback() because
+ * we call it twice for the one page as the code in end_buffer_async_write()
+ * assumes that all buffers on the page are started at the same time.
+ *
+ * The fix is two passes across the ioend list - one to start writeback on the
+ * buffer_heads, and then submit them for I/O on the second pass.
+ *
+ * If @fail is non-zero, it means that we have a situation where some part of
+ * the submission process has failed after we have marked paged for writeback
+ * and unlocked them. In this situation, we need to fail the ioend chain rather
+ * than submit it to IO. This typically only happens on a filesystem shutdown.
+ */
+STATIC void
+xfs_submit_ioend(
+ struct writeback_control *wbc,
+ xfs_ioend_t *ioend,
+ int fail)
+{
+ xfs_ioend_t *head = ioend;
+ xfs_ioend_t *next;
+ struct buffer_head *bh;
+ struct bio *bio;
+ sector_t lastblock = 0;
+
+ /* Pass 1 - start writeback */
+ do {
+ next = ioend->io_list;
+ for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
+ xfs_start_buffer_writeback(bh);
+ } while ((ioend = next) != NULL);
+
+ /* Pass 2 - submit I/O */
+ ioend = head;
+ do {
+ next = ioend->io_list;
+ bio = NULL;
+
+ /*
+ * If we are failing the IO now, just mark the ioend with an
+ * error and finish it. This will run IO completion immediately
+ * as there is only one reference to the ioend at this point in
+ * time.
+ */
+ if (fail) {
+ ioend->io_error = -fail;
+ xfs_finish_ioend(ioend);
+ continue;
+ }
+
+ for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+
+ if (!bio) {
+ retry:
+ bio = xfs_alloc_ioend_bio(bh);
+ } else if (bh->b_blocknr != lastblock + 1) {
+ xfs_submit_ioend_bio(wbc, ioend, bio);
+ goto retry;
+ }
+
+ if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
+ xfs_submit_ioend_bio(wbc, ioend, bio);
+ goto retry;
+ }
+
+ lastblock = bh->b_blocknr;
+ }
+ if (bio)
+ xfs_submit_ioend_bio(wbc, ioend, bio);
+ xfs_finish_ioend(ioend);
+ } while ((ioend = next) != NULL);
+}
+
+/*
+ * Cancel submission of all buffer_heads so far in this endio.
+ * Toss the endio too. Only ever called for the initial page
+ * in a writepage request, so only ever one page.
+ */
+STATIC void
+xfs_cancel_ioend(
+ xfs_ioend_t *ioend)
+{
+ xfs_ioend_t *next;
+ struct buffer_head *bh, *next_bh;
+
+ do {
+ next = ioend->io_list;
+ bh = ioend->io_buffer_head;
+ do {
+ next_bh = bh->b_private;
+ clear_buffer_async_write(bh);
+ unlock_buffer(bh);
+ } while ((bh = next_bh) != NULL);
+
+ mempool_free(ioend, xfs_ioend_pool);
+ } while ((ioend = next) != NULL);
+}
+
+/*
+ * Test to see if we've been building up a completion structure for
+ * earlier buffers -- if so, we try to append to this ioend if we
+ * can, otherwise we finish off any current ioend and start another.
+ * Return true if we've finished the given ioend.
+ */
+STATIC void
+xfs_add_to_ioend(
+ struct inode *inode,
+ struct buffer_head *bh,
+ xfs_off_t offset,
+ unsigned int type,
+ xfs_ioend_t **result,
+ int need_ioend)
+{
+ xfs_ioend_t *ioend = *result;
+
+ if (!ioend || need_ioend || type != ioend->io_type) {
+ xfs_ioend_t *previous = *result;
+
+ ioend = xfs_alloc_ioend(inode, type);
+ ioend->io_offset = offset;
+ ioend->io_buffer_head = bh;
+ ioend->io_buffer_tail = bh;
+ if (previous)
+ previous->io_list = ioend;
+ *result = ioend;
+ } else {
+ ioend->io_buffer_tail->b_private = bh;
+ ioend->io_buffer_tail = bh;
+ }
+
+ bh->b_private = NULL;
+ ioend->io_size += bh->b_size;
+}
+
+STATIC void
+xfs_map_buffer(
+ struct inode *inode,
+ struct buffer_head *bh,
+ struct xfs_bmbt_irec *imap,
+ xfs_off_t offset)
+{
+ sector_t bn;
+ struct xfs_mount *m = XFS_I(inode)->i_mount;
+ xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
+ xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
+
+ ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+ ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+
+ bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
+ ((offset - iomap_offset) >> inode->i_blkbits);
+
+ ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
+
+ bh->b_blocknr = bn;
+ set_buffer_mapped(bh);
+}
+
+STATIC void
+xfs_map_at_offset(
+ struct inode *inode,
+ struct buffer_head *bh,
+ struct xfs_bmbt_irec *imap,
+ xfs_off_t offset)
+{
+ ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+ ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+
+ xfs_map_buffer(inode, bh, imap, offset);
+ set_buffer_mapped(bh);
+ clear_buffer_delay(bh);
+ clear_buffer_unwritten(bh);
+}
+
+/*
+ * Test if a given page contains at least one buffer of a given @type.
+ * If @check_all_buffers is true, then we walk all the buffers in the page to
+ * try to find one of the type passed in. If it is not set, then the caller only
+ * needs to check the first buffer on the page for a match.
+ */
+STATIC bool
+xfs_check_page_type(
+ struct page *page,
+ unsigned int type,
+ bool check_all_buffers)
+{
+ struct buffer_head *bh;
+ struct buffer_head *head;
+
+ if (PageWriteback(page))
+ return false;
+ if (!page->mapping)
+ return false;
+ if (!page_has_buffers(page))
+ return false;
+
+ bh = head = page_buffers(page);
+ do {
+ if (buffer_unwritten(bh)) {
+ if (type == XFS_IO_UNWRITTEN)
+ return true;
+ } else if (buffer_delay(bh)) {
+ if (type == XFS_IO_DELALLOC)
+ return true;
+ } else if (buffer_dirty(bh) && buffer_mapped(bh)) {
+ if (type == XFS_IO_OVERWRITE)
+ return true;
+ }
+
+ /* If we are only checking the first buffer, we are done now. */
+ if (!check_all_buffers)
+ break;
+ } while ((bh = bh->b_this_page) != head);
+
+ return false;
+}
+
+/*
+ * Allocate & map buffers for page given the extent map. Write it out.
+ * except for the original page of a writepage, this is called on
+ * delalloc/unwritten pages only, for the original page it is possible
+ * that the page has no mapping at all.
+ */
+STATIC int
+xfs_convert_page(
+ struct inode *inode,
+ struct page *page,
+ loff_t tindex,
+ struct xfs_bmbt_irec *imap,
+ xfs_ioend_t **ioendp,
+ struct writeback_control *wbc)
+{
+ struct buffer_head *bh, *head;
+ xfs_off_t end_offset;
+ unsigned long p_offset;
+ unsigned int type;
+ int len, page_dirty;
+ int count = 0, done = 0, uptodate = 1;
+ xfs_off_t offset = page_offset(page);
+
+ if (page->index != tindex)
+ goto fail;
+ if (!trylock_page(page))
+ goto fail;
+ if (PageWriteback(page))
+ goto fail_unlock_page;
+ if (page->mapping != inode->i_mapping)
+ goto fail_unlock_page;
+ if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
+ goto fail_unlock_page;
+
+ /*
+ * page_dirty is initially a count of buffers on the page before
+ * EOF and is decremented as we move each into a cleanable state.
+ *
+ * Derivation:
+ *
+ * End offset is the highest offset that this page should represent.
+ * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
+ * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
+ * hence give us the correct page_dirty count. On any other page,
+ * it will be zero and in that case we need page_dirty to be the
+ * count of buffers on the page.
+ */
+ end_offset = min_t(unsigned long long,
+ (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+ i_size_read(inode));
+
+ /*
+ * If the current map does not span the entire page we are about to try
+ * to write, then give up. The only way we can write a page that spans
+ * multiple mappings in a single writeback iteration is via the
+ * xfs_vm_writepage() function. Data integrity writeback requires the
+ * entire page to be written in a single attempt, otherwise the part of
+ * the page we don't write here doesn't get written as part of the data
+ * integrity sync.
+ *
+ * For normal writeback, we also don't attempt to write partial pages
+ * here as it simply means that write_cache_pages() will see it under
+ * writeback and ignore the page until some point in the future, at
+ * which time this will be the only page in the file that needs
+ * writeback. Hence for more optimal IO patterns, we should always
+ * avoid partial page writeback due to multiple mappings on a page here.
+ */
+ if (!xfs_imap_valid(inode, imap, end_offset))
+ goto fail_unlock_page;
+
+ len = 1 << inode->i_blkbits;
+ p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
+ PAGE_CACHE_SIZE);
+ p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
+ page_dirty = p_offset / len;
+
+ /*
+ * The moment we find a buffer that doesn't match our current type
+ * specification or can't be written, abort the loop and start
+ * writeback. As per the above xfs_imap_valid() check, only
+ * xfs_vm_writepage() can handle partial page writeback fully - we are
+ * limited here to the buffers that are contiguous with the current
+ * ioend, and hence a buffer we can't write breaks that contiguity and
+ * we have to defer the rest of the IO to xfs_vm_writepage().
+ */
+ bh = head = page_buffers(page);
+ do {
+ if (offset >= end_offset)
+ break;
+ if (!buffer_uptodate(bh))
+ uptodate = 0;
+ if (!(PageUptodate(page) || buffer_uptodate(bh))) {
+ done = 1;
+ break;
+ }
+
+ if (buffer_unwritten(bh) || buffer_delay(bh) ||
+ buffer_mapped(bh)) {
+ if (buffer_unwritten(bh))
+ type = XFS_IO_UNWRITTEN;
+ else if (buffer_delay(bh))
+ type = XFS_IO_DELALLOC;
+ else
+ type = XFS_IO_OVERWRITE;
+
+ /*
+ * imap should always be valid because of the above
+ * partial page end_offset check on the imap.
+ */
+ ASSERT(xfs_imap_valid(inode, imap, offset));
+
+ lock_buffer(bh);
+ if (type != XFS_IO_OVERWRITE)
+ xfs_map_at_offset(inode, bh, imap, offset);
+ xfs_add_to_ioend(inode, bh, offset, type,
+ ioendp, done);
+
+ page_dirty--;
+ count++;
+ } else {
+ done = 1;
+ break;
+ }
+ } while (offset += len, (bh = bh->b_this_page) != head);
+
+ if (uptodate && bh == head)
+ SetPageUptodate(page);
+
+ if (count) {
+ if (--wbc->nr_to_write <= 0 &&
+ wbc->sync_mode == WB_SYNC_NONE)
+ done = 1;
+ }
+ xfs_start_page_writeback(page, !page_dirty, count);
+
+ return done;
+ fail_unlock_page:
+ unlock_page(page);
+ fail:
+ return 1;
+}
+
+/*
+ * Convert & write out a cluster of pages in the same extent as defined
+ * by mp and following the start page.
+ */
+STATIC void
+xfs_cluster_write(
+ struct inode *inode,
+ pgoff_t tindex,
+ struct xfs_bmbt_irec *imap,
+ xfs_ioend_t **ioendp,
+ struct writeback_control *wbc,
+ pgoff_t tlast)
+{
+ struct pagevec pvec;
+ int done = 0, i;
+
+ pagevec_init(&pvec, 0);
+ while (!done && tindex <= tlast) {
+ unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
+
+ if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
+ break;
+
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ done = xfs_convert_page(inode, pvec.pages[i], tindex++,
+ imap, ioendp, wbc);
+ if (done)
+ break;
+ }
+
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+}
+
+STATIC void
+xfs_vm_invalidatepage(
+ struct page *page,
+ unsigned int offset,
+ unsigned int length)
+{
+ trace_xfs_invalidatepage(page->mapping->host, page, offset,
+ length);
+ block_invalidatepage(page, offset, length);
+}
+
+/*
+ * If the page has delalloc buffers on it, we need to punch them out before we
+ * invalidate the page. If we don't, we leave a stale delalloc mapping on the
+ * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
+ * is done on that same region - the delalloc extent is returned when none is
+ * supposed to be there.
+ *
+ * We prevent this by truncating away the delalloc regions on the page before
+ * invalidating it. Because they are delalloc, we can do this without needing a
+ * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
+ * truncation without a transaction as there is no space left for block
+ * reservation (typically why we see a ENOSPC in writeback).
+ *
+ * This is not a performance critical path, so for now just do the punching a
+ * buffer head at a time.
+ */
+STATIC void
+xfs_aops_discard_page(
+ struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct buffer_head *bh, *head;
+ loff_t offset = page_offset(page);
+
+ if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
+ goto out_invalidate;
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ goto out_invalidate;
+
+ xfs_alert(ip->i_mount,
+ "page discard on page %p, inode 0x%llx, offset %llu.",
+ page, ip->i_ino, offset);
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ bh = head = page_buffers(page);
+ do {
+ int error;
+ xfs_fileoff_t start_fsb;
+
+ if (!buffer_delay(bh))
+ goto next_buffer;
+
+ start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+ error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
+ if (error) {
+ /* something screwed, just bail */
+ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ xfs_alert(ip->i_mount,
+ "page discard unable to remove delalloc mapping.");
+ }
+ break;
+ }
+next_buffer:
+ offset += 1 << inode->i_blkbits;
+
+ } while ((bh = bh->b_this_page) != head);
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_invalidate:
+ xfs_vm_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+ return;
+}
+
+/*
+ * Write out a dirty page.
+ *
+ * For delalloc space on the page we need to allocate space and flush it.
+ * For unwritten space on the page we need to start the conversion to
+ * regular allocated space.
+ * For any other dirty buffer heads on the page we should flush them.
+ */
+STATIC int
+xfs_vm_writepage(
+ struct page *page,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = page->mapping->host;
+ struct buffer_head *bh, *head;
+ struct xfs_bmbt_irec imap;
+ xfs_ioend_t *ioend = NULL, *iohead = NULL;
+ loff_t offset;
+ unsigned int type;
+ __uint64_t end_offset;
+ pgoff_t end_index, last_index;
+ ssize_t len;
+ int err, imap_valid = 0, uptodate = 1;
+ int count = 0;
+ int nonblocking = 0;
+
+ trace_xfs_writepage(inode, page, 0, 0);
+
+ ASSERT(page_has_buffers(page));
+
+ /*
+ * Refuse to write the page out if we are called from reclaim context.
+ *
+ * This avoids stack overflows when called from deeply used stacks in
+ * random callers for direct reclaim or memcg reclaim. We explicitly
+ * allow reclaim from kswapd as the stack usage there is relatively low.
+ *
+ * This should never happen except in the case of a VM regression so
+ * warn about it.
+ */
+ if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
+ PF_MEMALLOC))
+ goto redirty;
+
+ /*
+ * Given that we do not allow direct reclaim to call us, we should
+ * never be called while in a filesystem transaction.
+ */
+ if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
+ goto redirty;
+
+ /* Is this page beyond the end of the file? */
+ offset = i_size_read(inode);
+ end_index = offset >> PAGE_CACHE_SHIFT;
+ last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
+
+ /*
+ * The page index is less than the end_index, adjust the end_offset
+ * to the highest offset that this page should represent.
+ * -----------------------------------------------------
+ * | file mapping | <EOF> |
+ * -----------------------------------------------------
+ * | Page ... | Page N-2 | Page N-1 | Page N | |
+ * ^--------------------------------^----------|--------
+ * | desired writeback range | see else |
+ * ---------------------------------^------------------|
+ */
+ if (page->index < end_index)
+ end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
+ else {
+ /*
+ * Check whether the page to write out is beyond or straddles
+ * i_size or not.
+ * -------------------------------------------------------
+ * | file mapping | <EOF> |
+ * -------------------------------------------------------
+ * | Page ... | Page N-2 | Page N-1 | Page N | Beyond |
+ * ^--------------------------------^-----------|---------
+ * | | Straddles |
+ * ---------------------------------^-----------|--------|
+ */
+ unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
+
+ /*
+ * Skip the page if it is fully outside i_size, e.g. due to a
+ * truncate operation that is in progress. We must redirty the
+ * page so that reclaim stops reclaiming it. Otherwise
+ * xfs_vm_releasepage() is called on it and gets confused.
+ *
+ * Note that the end_index is unsigned long, it would overflow
+ * if the given offset is greater than 16TB on 32-bit system
+ * and if we do check the page is fully outside i_size or not
+ * via "if (page->index >= end_index + 1)" as "end_index + 1"
+ * will be evaluated to 0. Hence this page will be redirtied
+ * and be written out repeatedly which would result in an
+ * infinite loop, the user program that perform this operation
+ * will hang. Instead, we can verify this situation by checking
+ * if the page to write is totally beyond the i_size or if it's
+ * offset is just equal to the EOF.
+ */
+ if (page->index > end_index ||
+ (page->index == end_index && offset_into_page == 0))
+ goto redirty;
+
+ /*
+ * The page straddles i_size. It must be zeroed out on each
+ * and every writepage invocation because it may be mmapped.
+ * "A file is mapped in multiples of the page size. For a file
+ * that is not a multiple of the page size, the remaining
+ * memory is zeroed when mapped, and writes to that region are
+ * not written out to the file."
+ */
+ zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
+
+ /* Adjust the end_offset to the end of file */
+ end_offset = offset;
+ }
+
+ len = 1 << inode->i_blkbits;
+
+ bh = head = page_buffers(page);
+ offset = page_offset(page);
+ type = XFS_IO_OVERWRITE;
+
+ if (wbc->sync_mode == WB_SYNC_NONE)
+ nonblocking = 1;
+
+ do {
+ int new_ioend = 0;
+
+ if (offset >= end_offset)
+ break;
+ if (!buffer_uptodate(bh))
+ uptodate = 0;
+
+ /*
+ * set_page_dirty dirties all buffers in a page, independent
+ * of their state. The dirty state however is entirely
+ * meaningless for holes (!mapped && uptodate), so skip
+ * buffers covering holes here.
+ */
+ if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
+ imap_valid = 0;
+ continue;
+ }
+
+ if (buffer_unwritten(bh)) {
+ if (type != XFS_IO_UNWRITTEN) {
+ type = XFS_IO_UNWRITTEN;
+ imap_valid = 0;
+ }
+ } else if (buffer_delay(bh)) {
+ if (type != XFS_IO_DELALLOC) {
+ type = XFS_IO_DELALLOC;
+ imap_valid = 0;
+ }
+ } else if (buffer_uptodate(bh)) {
+ if (type != XFS_IO_OVERWRITE) {
+ type = XFS_IO_OVERWRITE;
+ imap_valid = 0;
+ }
+ } else {
+ if (PageUptodate(page))
+ ASSERT(buffer_mapped(bh));
+ /*
+ * This buffer is not uptodate and will not be
+ * written to disk. Ensure that we will put any
+ * subsequent writeable buffers into a new
+ * ioend.
+ */
+ imap_valid = 0;
+ continue;
+ }
+
+ if (imap_valid)
+ imap_valid = xfs_imap_valid(inode, &imap, offset);
+ if (!imap_valid) {
+ /*
+ * If we didn't have a valid mapping then we need to
+ * put the new mapping into a separate ioend structure.
+ * This ensures non-contiguous extents always have
+ * separate ioends, which is particularly important
+ * for unwritten extent conversion at I/O completion
+ * time.
+ */
+ new_ioend = 1;
+ err = xfs_map_blocks(inode, offset, &imap, type,
+ nonblocking);
+ if (err)
+ goto error;
+ imap_valid = xfs_imap_valid(inode, &imap, offset);
+ }
+ if (imap_valid) {
+ lock_buffer(bh);
+ if (type != XFS_IO_OVERWRITE)
+ xfs_map_at_offset(inode, bh, &imap, offset);
+ xfs_add_to_ioend(inode, bh, offset, type, &ioend,
+ new_ioend);
+ count++;
+ }
+
+ if (!iohead)
+ iohead = ioend;
+
+ } while (offset += len, ((bh = bh->b_this_page) != head));
+
+ if (uptodate && bh == head)
+ SetPageUptodate(page);
+
+ xfs_start_page_writeback(page, 1, count);
+
+ /* if there is no IO to be submitted for this page, we are done */
+ if (!ioend)
+ return 0;
+
+ ASSERT(iohead);
+
+ /*
+ * Any errors from this point onwards need tobe reported through the IO
+ * completion path as we have marked the initial page as under writeback
+ * and unlocked it.
+ */
+ if (imap_valid) {
+ xfs_off_t end_index;
+
+ end_index = imap.br_startoff + imap.br_blockcount;
+
+ /* to bytes */
+ end_index <<= inode->i_blkbits;
+
+ /* to pages */
+ end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
+
+ /* check against file size */
+ if (end_index > last_index)
+ end_index = last_index;
+
+ xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
+ wbc, end_index);
+ }
+
+
+ /*
+ * Reserve log space if we might write beyond the on-disk inode size.
+ */
+ err = 0;
+ if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
+ err = xfs_setfilesize_trans_alloc(ioend);
+
+ xfs_submit_ioend(wbc, iohead, err);
+
+ return 0;
+
+error:
+ if (iohead)
+ xfs_cancel_ioend(iohead);
+
+ if (err == -EAGAIN)
+ goto redirty;
+
+ xfs_aops_discard_page(page);
+ ClearPageUptodate(page);
+ unlock_page(page);
+ return err;
+
+redirty:
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ return 0;
+}
+
+STATIC int
+xfs_vm_writepages(
+ struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+ return generic_writepages(mapping, wbc);
+}
+
+/*
+ * Called to move a page into cleanable state - and from there
+ * to be released. The page should already be clean. We always
+ * have buffer heads in this call.
+ *
+ * Returns 1 if the page is ok to release, 0 otherwise.
+ */
+STATIC int
+xfs_vm_releasepage(
+ struct page *page,
+ gfp_t gfp_mask)
+{
+ int delalloc, unwritten;
+
+ trace_xfs_releasepage(page->mapping->host, page, 0, 0);
+
+ xfs_count_page_state(page, &delalloc, &unwritten);
+
+ if (WARN_ON_ONCE(delalloc))
+ return 0;
+ if (WARN_ON_ONCE(unwritten))
+ return 0;
+
+ return try_to_free_buffers(page);
+}
+
+STATIC int
+__xfs_get_blocks(
+ struct inode *inode,
+ sector_t iblock,
+ struct buffer_head *bh_result,
+ int create,
+ int direct)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t offset_fsb, end_fsb;
+ int error = 0;
+ int lockmode = 0;
+ struct xfs_bmbt_irec imap;
+ int nimaps = 1;
+ xfs_off_t offset;
+ ssize_t size;
+ int new = 0;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -XFS_ERROR(EIO);
+
+ offset = (xfs_off_t)iblock << inode->i_blkbits;
+ ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+ size = bh_result->b_size;
+
+ if (!create && direct && offset >= i_size_read(inode))
+ return 0;
+
+ /*
+ * Direct I/O is usually done on preallocated files, so try getting
+ * a block mapping without an exclusive lock first. For buffered
+ * writes we already have the exclusive iolock anyway, so avoiding
+ * a lock roundtrip here by taking the ilock exclusive from the
+ * beginning is a useful micro optimization.
+ */
+ if (create && !direct) {
+ lockmode = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, lockmode);
+ } else {
+ lockmode = xfs_ilock_data_map_shared(ip);
+ }
+
+ ASSERT(offset <= mp->m_super->s_maxbytes);
+ if (offset + size > mp->m_super->s_maxbytes)
+ size = mp->m_super->s_maxbytes - offset;
+ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
+ &imap, &nimaps, XFS_BMAPI_ENTIRE);
+ if (error)
+ goto out_unlock;
+
+ if (create &&
+ (!nimaps ||
+ (imap.br_startblock == HOLESTARTBLOCK ||
+ imap.br_startblock == DELAYSTARTBLOCK))) {
+ if (direct || xfs_get_extsz_hint(ip)) {
+ /*
+ * Drop the ilock in preparation for starting the block
+ * allocation transaction. It will be retaken
+ * exclusively inside xfs_iomap_write_direct for the
+ * actual allocation.
+ */
+ xfs_iunlock(ip, lockmode);
+ error = xfs_iomap_write_direct(ip, offset, size,
+ &imap, nimaps);
+ if (error)
+ return -error;
+ new = 1;
+ } else {
+ /*
+ * Delalloc reservations do not require a transaction,
+ * we can go on without dropping the lock here. If we
+ * are allocating a new delalloc block, make sure that
+ * we set the new flag so that we mark the buffer new so
+ * that we know that it is newly allocated if the write
+ * fails.
+ */
+ if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
+ new = 1;
+ error = xfs_iomap_write_delay(ip, offset, size, &imap);
+ if (error)
+ goto out_unlock;
+
+ xfs_iunlock(ip, lockmode);
+ }
+
+ trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
+ } else if (nimaps) {
+ trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
+ xfs_iunlock(ip, lockmode);
+ } else {
+ trace_xfs_get_blocks_notfound(ip, offset, size);
+ goto out_unlock;
+ }
+
+ if (imap.br_startblock != HOLESTARTBLOCK &&
+ imap.br_startblock != DELAYSTARTBLOCK) {
+ /*
+ * For unwritten extents do not report a disk address on
+ * the read case (treat as if we're reading into a hole).
+ */
+ if (create || !ISUNWRITTEN(&imap))
+ xfs_map_buffer(inode, bh_result, &imap, offset);
+ if (create && ISUNWRITTEN(&imap)) {
+ if (direct) {
+ bh_result->b_private = inode;
+ set_buffer_defer_completion(bh_result);
+ }
+ set_buffer_unwritten(bh_result);
+ }
+ }
+
+ /*
+ * If this is a realtime file, data may be on a different device.
+ * to that pointed to from the buffer_head b_bdev currently.
+ */
+ bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
+
+ /*
+ * If we previously allocated a block out beyond eof and we are now
+ * coming back to use it then we will need to flag it as new even if it
+ * has a disk address.
+ *
+ * With sub-block writes into unwritten extents we also need to mark
+ * the buffer as new so that the unwritten parts of the buffer gets
+ * correctly zeroed.
+ */
+ if (create &&
+ ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
+ (offset >= i_size_read(inode)) ||
+ (new || ISUNWRITTEN(&imap))))
+ set_buffer_new(bh_result);
+
+ if (imap.br_startblock == DELAYSTARTBLOCK) {
+ BUG_ON(direct);
+ if (create) {
+ set_buffer_uptodate(bh_result);
+ set_buffer_mapped(bh_result);
+ set_buffer_delay(bh_result);
+ }
+ }
+
+ /*
+ * If this is O_DIRECT or the mpage code calling tell them how large
+ * the mapping is, so that we can avoid repeated get_blocks calls.
+ *
+ * If the mapping spans EOF, then we have to break the mapping up as the
+ * mapping for blocks beyond EOF must be marked new so that sub block
+ * regions can be correctly zeroed. We can't do this for mappings within
+ * EOF unless the mapping was just allocated or is unwritten, otherwise
+ * the callers would overwrite existing data with zeros. Hence we have
+ * to split the mapping into a range up to and including EOF, and a
+ * second mapping for beyond EOF.
+ */
+ if (direct || size > (1 << inode->i_blkbits)) {
+ xfs_off_t mapping_size;
+
+ mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
+ mapping_size <<= inode->i_blkbits;
+
+ ASSERT(mapping_size > 0);
+ if (mapping_size > size)
+ mapping_size = size;
+ if (offset < i_size_read(inode) &&
+ offset + mapping_size >= i_size_read(inode)) {
+ /* limit mapping to block that spans EOF */
+ mapping_size = roundup_64(i_size_read(inode) - offset,
+ 1 << inode->i_blkbits);
+ }
+ if (mapping_size > LONG_MAX)
+ mapping_size = LONG_MAX;
+
+ bh_result->b_size = mapping_size;
+ }
+
+ return 0;
+
+out_unlock:
+ xfs_iunlock(ip, lockmode);
+ return -error;
+}
+
+int
+xfs_get_blocks(
+ struct inode *inode,
+ sector_t iblock,
+ struct buffer_head *bh_result,
+ int create)
+{
+ return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
+}
+
+STATIC int
+xfs_get_blocks_direct(
+ struct inode *inode,
+ sector_t iblock,
+ struct buffer_head *bh_result,
+ int create)
+{
+ return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
+}
+
+/*
+ * Complete a direct I/O write request.
+ *
+ * If the private argument is non-NULL __xfs_get_blocks signals us that we
+ * need to issue a transaction to convert the range from unwritten to written
+ * extents. In case this is regular synchronous I/O we just call xfs_end_io
+ * to do this and we are done. But in case this was a successful AIO
+ * request this handler is called from interrupt context, from which we
+ * can't start transactions. In that case offload the I/O completion to
+ * the workqueues we also use for buffered I/O completion.
+ */
+STATIC void
+xfs_end_io_direct_write(
+ struct kiocb *iocb,
+ loff_t offset,
+ ssize_t size,
+ void *private)
+{
+ struct xfs_ioend *ioend = iocb->private;
+
+ /*
+ * While the generic direct I/O code updates the inode size, it does
+ * so only after the end_io handler is called, which means our
+ * end_io handler thinks the on-disk size is outside the in-core
+ * size. To prevent this just update it a little bit earlier here.
+ */
+ if (offset + size > i_size_read(ioend->io_inode))
+ i_size_write(ioend->io_inode, offset + size);
+
+ /*
+ * blockdev_direct_IO can return an error even after the I/O
+ * completion handler was called. Thus we need to protect
+ * against double-freeing.
+ */
+ iocb->private = NULL;
+
+ ioend->io_offset = offset;
+ ioend->io_size = size;
+ if (private && size > 0)
+ ioend->io_type = XFS_IO_UNWRITTEN;
+
+ xfs_finish_ioend_sync(ioend);
+}
+
+STATIC ssize_t
+xfs_vm_direct_IO(
+ int rw,
+ struct kiocb *iocb,
+ struct iov_iter *iter,
+ loff_t offset)
+{
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct block_device *bdev = xfs_find_bdev_for_inode(inode);
+ struct xfs_ioend *ioend = NULL;
+ ssize_t ret;
+
+ if (rw & WRITE) {
+ size_t size = iov_iter_count(iter);
+
+ /*
+ * We cannot preallocate a size update transaction here as we
+ * don't know whether allocation is necessary or not. Hence we
+ * can only tell IO completion that one is necessary if we are
+ * not doing unwritten extent conversion.
+ */
+ iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT);
+ if (offset + size > XFS_I(inode)->i_d.di_size)
+ ioend->io_isdirect = 1;
+
+ ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+ offset, xfs_get_blocks_direct,
+ xfs_end_io_direct_write, NULL,
+ DIO_ASYNC_EXTEND);
+ if (ret != -EIOCBQUEUED && iocb->private)
+ goto out_destroy_ioend;
+ } else {
+ ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+ offset, xfs_get_blocks_direct,
+ NULL, NULL, 0);
+ }
+
+ return ret;
+
+out_destroy_ioend:
+ xfs_destroy_ioend(ioend);
+ return ret;
+}
+
+/*
+ * Punch out the delalloc blocks we have already allocated.
+ *
+ * Don't bother with xfs_setattr given that nothing can have made it to disk yet
+ * as the page is still locked at this point.
+ */
+STATIC void
+xfs_vm_kill_delalloc_range(
+ struct inode *inode,
+ loff_t start,
+ loff_t end)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ xfs_fileoff_t start_fsb;
+ xfs_fileoff_t end_fsb;
+ int error;
+
+ start_fsb = XFS_B_TO_FSB(ip->i_mount, start);
+ end_fsb = XFS_B_TO_FSB(ip->i_mount, end);
+ if (end_fsb <= start_fsb)
+ return;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+ end_fsb - start_fsb);
+ if (error) {
+ /* something screwed, just bail */
+ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ xfs_alert(ip->i_mount,
+ "xfs_vm_write_failed: unable to clean up ino %lld",
+ ip->i_ino);
+ }
+ }
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+}
+
+STATIC void
+xfs_vm_write_failed(
+ struct inode *inode,
+ struct page *page,
+ loff_t pos,
+ unsigned len)
+{
+ loff_t block_offset;
+ loff_t block_start;
+ loff_t block_end;
+ loff_t from = pos & (PAGE_CACHE_SIZE - 1);
+ loff_t to = from + len;
+ struct buffer_head *bh, *head;
+
+ /*
+ * The request pos offset might be 32 or 64 bit, this is all fine
+ * on 64-bit platform. However, for 64-bit pos request on 32-bit
+ * platform, the high 32-bit will be masked off if we evaluate the
+ * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
+ * 0xfffff000 as an unsigned long, hence the result is incorrect
+ * which could cause the following ASSERT failed in most cases.
+ * In order to avoid this, we can evaluate the block_offset of the
+ * start of the page by using shifts rather than masks the mismatch
+ * problem.
+ */
+ block_offset = (pos >> PAGE_CACHE_SHIFT) << PAGE_CACHE_SHIFT;
+
+ ASSERT(block_offset + from == pos);
+
+ head = page_buffers(page);
+ block_start = 0;
+ for (bh = head; bh != head || !block_start;
+ bh = bh->b_this_page, block_start = block_end,
+ block_offset += bh->b_size) {
+ block_end = block_start + bh->b_size;
+
+ /* skip buffers before the write */
+ if (block_end <= from)
+ continue;
+
+ /* if the buffer is after the write, we're done */
+ if (block_start >= to)
+ break;
+
+ if (!buffer_delay(bh))
+ continue;
+
+ if (!buffer_new(bh) && block_offset < i_size_read(inode))
+ continue;
+
+ xfs_vm_kill_delalloc_range(inode, block_offset,
+ block_offset + bh->b_size);
+
+ /*
+ * This buffer does not contain data anymore. make sure anyone
+ * who finds it knows that for certain.
+ */
+ clear_buffer_delay(bh);
+ clear_buffer_uptodate(bh);
+ clear_buffer_mapped(bh);
+ clear_buffer_new(bh);
+ clear_buffer_dirty(bh);
+ }
+
+}
+
+/*
+ * This used to call block_write_begin(), but it unlocks and releases the page
+ * on error, and we need that page to be able to punch stale delalloc blocks out
+ * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
+ * the appropriate point.
+ */
+STATIC int
+xfs_vm_write_begin(
+ struct file *file,
+ struct address_space *mapping,
+ loff_t pos,
+ unsigned len,
+ unsigned flags,
+ struct page **pagep,
+ void **fsdata)
+{
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ struct page *page;
+ int status;
+
+ ASSERT(len <= PAGE_CACHE_SIZE);
+
+ page = grab_cache_page_write_begin(mapping, index, flags);
+ if (!page)
+ return -ENOMEM;
+
+ status = __block_write_begin(page, pos, len, xfs_get_blocks);
+ if (unlikely(status)) {
+ struct inode *inode = mapping->host;
+ size_t isize = i_size_read(inode);
+
+ xfs_vm_write_failed(inode, page, pos, len);
+ unlock_page(page);
+
+ /*
+ * If the write is beyond EOF, we only want to kill blocks
+ * allocated in this write, not blocks that were previously
+ * written successfully.
+ */
+ if (pos + len > isize) {
+ ssize_t start = max_t(ssize_t, pos, isize);
+
+ truncate_pagecache_range(inode, start, pos + len);
+ }
+
+ page_cache_release(page);
+ page = NULL;
+ }
+
+ *pagep = page;
+ return status;
+}
+
+/*
+ * On failure, we only need to kill delalloc blocks beyond EOF in the range of
+ * this specific write because they will never be written. Previous writes
+ * beyond EOF where block allocation succeeded do not need to be trashed, so
+ * only new blocks from this write should be trashed. For blocks within
+ * EOF, generic_write_end() zeros them so they are safe to leave alone and be
+ * written with all the other valid data.
+ */
+STATIC int
+xfs_vm_write_end(
+ struct file *file,
+ struct address_space *mapping,
+ loff_t pos,
+ unsigned len,
+ unsigned copied,
+ struct page *page,
+ void *fsdata)
+{
+ int ret;
+
+ ASSERT(len <= PAGE_CACHE_SIZE);
+
+ ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+ if (unlikely(ret < len)) {
+ struct inode *inode = mapping->host;
+ size_t isize = i_size_read(inode);
+ loff_t to = pos + len;
+
+ if (to > isize) {
+ /* only kill blocks in this write beyond EOF */
+ if (pos > isize)
+ isize = pos;
+ xfs_vm_kill_delalloc_range(inode, isize, to);
+ truncate_pagecache_range(inode, isize, to);
+ }
+ }
+ return ret;
+}
+
+STATIC sector_t
+xfs_vm_bmap(
+ struct address_space *mapping,
+ sector_t block)
+{
+ struct inode *inode = (struct inode *)mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+
+ trace_xfs_vm_bmap(XFS_I(inode));
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ filemap_write_and_wait(mapping);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ return generic_block_bmap(mapping, block, xfs_get_blocks);
+}
+
+STATIC int
+xfs_vm_readpage(
+ struct file *unused,
+ struct page *page)
+{
+ return mpage_readpage(page, xfs_get_blocks);
+}
+
+STATIC int
+xfs_vm_readpages(
+ struct file *unused,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned nr_pages)
+{
+ return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
+}
+
+const struct address_space_operations xfs_address_space_operations = {
+ .readpage = xfs_vm_readpage,
+ .readpages = xfs_vm_readpages,
+ .writepage = xfs_vm_writepage,
+ .writepages = xfs_vm_writepages,
+ .releasepage = xfs_vm_releasepage,
+ .invalidatepage = xfs_vm_invalidatepage,
+ .write_begin = xfs_vm_write_begin,
+ .write_end = xfs_vm_write_end,
+ .bmap = xfs_vm_bmap,
+ .direct_IO = xfs_vm_direct_IO,
+ .migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
+};
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/xfs_aops.h