summaryrefslogtreecommitdiff
path: root/drivers/input/serio/xilinx_ps2.c
blob: ebb22f88c8426bdd3cebff9e2f822d0b7412ade7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
/*
 * Xilinx XPS PS/2 device driver
 *
 * (c) 2005 MontaVista Software, Inc.
 * (c) 2008 Xilinx, Inc.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2 of the License, or (at your
 * option) any later version.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 675 Mass Ave, Cambridge, MA 02139, USA.
 */


#include <linux/module.h>
#include <linux/serio.h>
#include <linux/interrupt.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/io.h>

#include <linux/of_device.h>
#include <linux/of_platform.h>

#define DRIVER_NAME		"xilinx_ps2"

/* Register offsets for the xps2 device */
#define XPS2_SRST_OFFSET	0x00000000 /* Software Reset register */
#define XPS2_STATUS_OFFSET	0x00000004 /* Status register */
#define XPS2_RX_DATA_OFFSET	0x00000008 /* Receive Data register */
#define XPS2_TX_DATA_OFFSET	0x0000000C /* Transmit Data register */
#define XPS2_GIER_OFFSET	0x0000002C /* Global Interrupt Enable reg */
#define XPS2_IPISR_OFFSET	0x00000030 /* Interrupt Status register */
#define XPS2_IPIER_OFFSET	0x00000038 /* Interrupt Enable register */

/* Reset Register Bit Definitions */
#define XPS2_SRST_RESET		0x0000000A /* Software Reset  */

/* Status Register Bit Positions */
#define XPS2_STATUS_RX_FULL	0x00000001 /* Receive Full  */
#define XPS2_STATUS_TX_FULL	0x00000002 /* Transmit Full  */

/* Bit definitions for ISR/IER registers. Both the registers have the same bit
 * definitions and are only defined once. */
#define XPS2_IPIXR_WDT_TOUT	0x00000001 /* Watchdog Timeout Interrupt */
#define XPS2_IPIXR_TX_NOACK	0x00000002 /* Transmit No ACK Interrupt */
#define XPS2_IPIXR_TX_ACK	0x00000004 /* Transmit ACK (Data) Interrupt */
#define XPS2_IPIXR_RX_OVF	0x00000008 /* Receive Overflow Interrupt */
#define XPS2_IPIXR_RX_ERR	0x00000010 /* Receive Error Interrupt */
#define XPS2_IPIXR_RX_FULL	0x00000020 /* Receive Data Interrupt */

/* Mask for all the Transmit Interrupts */
#define XPS2_IPIXR_TX_ALL	(XPS2_IPIXR_TX_NOACK | XPS2_IPIXR_TX_ACK)

/* Mask for all the Receive Interrupts */
#define XPS2_IPIXR_RX_ALL	(XPS2_IPIXR_RX_OVF | XPS2_IPIXR_RX_ERR |  \
				 XPS2_IPIXR_RX_FULL)

/* Mask for all the Interrupts */
#define XPS2_IPIXR_ALL		(XPS2_IPIXR_TX_ALL | XPS2_IPIXR_RX_ALL |  \
				 XPS2_IPIXR_WDT_TOUT)

/* Global Interrupt Enable mask */
#define XPS2_GIER_GIE_MASK	0x80000000

struct xps2data {
	int irq;
	spinlock_t lock;
	void __iomem *base_address;	/* virt. address of control registers */
	unsigned int flags;
	struct serio serio;		/* serio */
};

/************************************/
/* XPS PS/2 data transmission calls */
/************************************/

/**
 * xps2_recv() - attempts to receive a byte from the PS/2 port.
 * @drvdata:	pointer to ps2 device private data structure
 * @byte:	address where the read data will be copied
 *
 * If there is any data available in the PS/2 receiver, this functions reads
 * the data, otherwise it returns error.
 */
static int xps2_recv(struct xps2data *drvdata, u8 *byte)
{
	u32 sr;
	int status = -1;

	/* If there is data available in the PS/2 receiver, read it */
	sr = in_be32(drvdata->base_address + XPS2_STATUS_OFFSET);
	if (sr & XPS2_STATUS_RX_FULL) {
		*byte = in_be32(drvdata->base_address + XPS2_RX_DATA_OFFSET);
		status = 0;
	}

	return status;
}

/*********************/
/* Interrupt handler */
/*********************/
static irqreturn_t xps2_interrupt(int irq, void *dev_id)
{
	struct xps2data *drvdata = dev_id;
	u32 intr_sr;
	u8 c;
	int status;

	/* Get the PS/2 interrupts and clear them */
	intr_sr = in_be32(drvdata->base_address + XPS2_IPISR_OFFSET);
	out_be32(drvdata->base_address + XPS2_IPISR_OFFSET, intr_sr);

	/* Check which interrupt is active */
	if (intr_sr & XPS2_IPIXR_RX_OVF)
		dev_warn(drvdata->serio.dev.parent, "receive overrun error\n");

	if (intr_sr & XPS2_IPIXR_RX_ERR)
		drvdata->flags |= SERIO_PARITY;

	if (intr_sr & (XPS2_IPIXR_TX_NOACK | XPS2_IPIXR_WDT_TOUT))
		drvdata->flags |= SERIO_TIMEOUT;

	if (intr_sr & XPS2_IPIXR_RX_FULL) {
		status = xps2_recv(drvdata, &c);

		/* Error, if a byte is not received */
		if (status) {
			dev_err(drvdata->serio.dev.parent,
				"wrong rcvd byte count (%d)\n", status);
		} else {
			serio_interrupt(&drvdata->serio, c, drvdata->flags);
			drvdata->flags = 0;
		}
	}

	return IRQ_HANDLED;
}

/*******************/
/* serio callbacks */
/*******************/

/**
 * sxps2_write() - sends a byte out through the PS/2 port.
 * @pserio:	pointer to the serio structure of the PS/2 port
 * @c:		data that needs to be written to the PS/2 port
 *
 * This function checks if the PS/2 transmitter is empty and sends a byte.
 * Otherwise it returns error. Transmission fails only when nothing is connected
 * to the PS/2 port. Thats why, we do not try to resend the data in case of a
 * failure.
 */
static int sxps2_write(struct serio *pserio, unsigned char c)
{
	struct xps2data *drvdata = pserio->port_data;
	unsigned long flags;
	u32 sr;
	int status = -1;

	spin_lock_irqsave(&drvdata->lock, flags);

	/* If the PS/2 transmitter is empty send a byte of data */
	sr = in_be32(drvdata->base_address + XPS2_STATUS_OFFSET);
	if (!(sr & XPS2_STATUS_TX_FULL)) {
		out_be32(drvdata->base_address + XPS2_TX_DATA_OFFSET, c);
		status = 0;
	}

	spin_unlock_irqrestore(&drvdata->lock, flags);

	return status;
}

/**
 * sxps2_open() - called when a port is opened by the higher layer.
 * @pserio:	pointer to the serio structure of the PS/2 device
 *
 * This function requests irq and enables interrupts for the PS/2 device.
 */
static int sxps2_open(struct serio *pserio)
{
	struct xps2data *drvdata = pserio->port_data;
	int error;
	u8 c;

	error = request_irq(drvdata->irq, &xps2_interrupt, 0,
				DRIVER_NAME, drvdata);
	if (error) {
		dev_err(drvdata->serio.dev.parent,
			"Couldn't allocate interrupt %d\n", drvdata->irq);
		return error;
	}

	/* start reception by enabling the interrupts */
	out_be32(drvdata->base_address + XPS2_GIER_OFFSET, XPS2_GIER_GIE_MASK);
	out_be32(drvdata->base_address + XPS2_IPIER_OFFSET, XPS2_IPIXR_RX_ALL);
	(void)xps2_recv(drvdata, &c);

	return 0;		/* success */
}

/**
 * sxps2_close() - frees the interrupt.
 * @pserio:	pointer to the serio structure of the PS/2 device
 *
 * This function frees the irq and disables interrupts for the PS/2 device.
 */
static void sxps2_close(struct serio *pserio)
{
	struct xps2data *drvdata = pserio->port_data;

	/* Disable the PS2 interrupts */
	out_be32(drvdata->base_address + XPS2_GIER_OFFSET, 0x00);
	out_be32(drvdata->base_address + XPS2_IPIER_OFFSET, 0x00);
	free_irq(drvdata->irq, drvdata);
}

/**
 * xps2_of_probe - probe method for the PS/2 device.
 * @of_dev:	pointer to OF device structure
 * @match:	pointer to the stucture used for matching a device
 *
 * This function probes the PS/2 device in the device tree.
 * It initializes the driver data structure and the hardware.
 * It returns 0, if the driver is bound to the PS/2 device, or a negative
 * value if there is an error.
 */
static int __devinit xps2_of_probe(struct of_device *ofdev,
				   const struct of_device_id *match)
{
	struct resource r_irq; /* Interrupt resources */
	struct resource r_mem; /* IO mem resources */
	struct xps2data *drvdata;
	struct serio *serio;
	struct device *dev = &ofdev->dev;
	resource_size_t remap_size, phys_addr;
	int error;

	dev_info(dev, "Device Tree Probing \'%s\'\n",
			ofdev->node->name);

	/* Get iospace for the device */
	error = of_address_to_resource(ofdev->node, 0, &r_mem);
	if (error) {
		dev_err(dev, "invalid address\n");
		return error;
	}

	/* Get IRQ for the device */
	if (of_irq_to_resource(ofdev->node, 0, &r_irq) == NO_IRQ) {
		dev_err(dev, "no IRQ found\n");
		return -ENODEV;
	}

	drvdata = kzalloc(sizeof(struct xps2data), GFP_KERNEL);
	if (!drvdata) {
		dev_err(dev, "Couldn't allocate device private record\n");
		return -ENOMEM;
	}

	dev_set_drvdata(dev, drvdata);

	spin_lock_init(&drvdata->lock);
	drvdata->irq = r_irq.start;

	phys_addr = r_mem.start;
	remap_size = r_mem.end - r_mem.start + 1;
	if (!request_mem_region(phys_addr, remap_size, DRIVER_NAME)) {
		dev_err(dev, "Couldn't lock memory region at 0x%08llX\n",
			(unsigned long long)phys_addr);
		error = -EBUSY;
		goto failed1;
	}

	/* Fill in configuration data and add them to the list */
	drvdata->base_address = ioremap(phys_addr, remap_size);
	if (drvdata->base_address == NULL) {
		dev_err(dev, "Couldn't ioremap memory at 0x%08llX\n",
			(unsigned long long)phys_addr);
		error = -EFAULT;
		goto failed2;
	}

	/* Disable all the interrupts, just in case */
	out_be32(drvdata->base_address + XPS2_IPIER_OFFSET, 0);

	/* Reset the PS2 device and abort any current transaction, to make sure
	 * we have the PS2 in a good state */
	out_be32(drvdata->base_address + XPS2_SRST_OFFSET, XPS2_SRST_RESET);

	dev_info(dev, "Xilinx PS2 at 0x%08llX mapped to 0x%p, irq=%d\n",
		 (unsigned long long)phys_addr, drvdata->base_address,
		 drvdata->irq);

	serio = &drvdata->serio;
	serio->id.type = SERIO_8042;
	serio->write = sxps2_write;
	serio->open = sxps2_open;
	serio->close = sxps2_close;
	serio->port_data = drvdata;
	serio->dev.parent = dev;
	snprintf(serio->name, sizeof(serio->name),
		 "Xilinx XPS PS/2 at %08llX", (unsigned long long)phys_addr);
	snprintf(serio->phys, sizeof(serio->phys),
		 "xilinxps2/serio at %08llX", (unsigned long long)phys_addr);

	serio_register_port(serio);

	return 0;		/* success */

failed2:
	release_mem_region(phys_addr, remap_size);
failed1:
	kfree(drvdata);
	dev_set_drvdata(dev, NULL);

	return error;
}

/**
 * xps2_of_remove - unbinds the driver from the PS/2 device.
 * @of_dev:	pointer to OF device structure
 *
 * This function is called if a device is physically removed from the system or
 * if the driver module is being unloaded. It frees any resources allocated to
 * the device.
 */
static int __devexit xps2_of_remove(struct of_device *of_dev)
{
	struct device *dev = &of_dev->dev;
	struct xps2data *drvdata = dev_get_drvdata(dev);
	struct resource r_mem; /* IO mem resources */

	serio_unregister_port(&drvdata->serio);
	iounmap(drvdata->base_address);

	/* Get iospace of the device */
	if (of_address_to_resource(of_dev->node, 0, &r_mem))
		dev_err(dev, "invalid address\n");
	else
		release_mem_region(r_mem.start, r_mem.end - r_mem.start + 1);

	kfree(drvdata);

	dev_set_drvdata(dev, NULL);

	return 0;
}

/* Match table for of_platform binding */
static struct of_device_id xps2_of_match[] __devinitdata = {
	{ .compatible = "xlnx,xps-ps2-1.00.a", },
	{ /* end of list */ },
};
MODULE_DEVICE_TABLE(of, xps2_of_match);

static struct of_platform_driver xps2_of_driver = {
	.name		= DRIVER_NAME,
	.match_table	= xps2_of_match,
	.probe		= xps2_of_probe,
	.remove		= __devexit_p(xps2_of_remove),
};

static int __init xps2_init(void)
{
	return of_register_platform_driver(&xps2_of_driver);
}

static void __exit xps2_cleanup(void)
{
	of_unregister_platform_driver(&xps2_of_driver);
}

module_init(xps2_init);
module_exit(xps2_cleanup);

MODULE_AUTHOR("Xilinx, Inc.");
MODULE_DESCRIPTION("Xilinx XPS PS/2 driver");
MODULE_LICENSE("GPL");
div>
break;
case NLM_LCK_DENIED_NOLOCKS:
@@ -802,9 +725,6 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
}
die:
- nlm_release_host(req->a_host);
- nlmclnt_release_lockargs(req);
- kfree(req);
return;
retry_cancel:
@@ -818,6 +738,7 @@ retry_cancel:
static const struct rpc_call_ops nlmclnt_cancel_ops = {
.rpc_call_done = nlmclnt_cancel_callback,
+ .rpc_release = nlmclnt_rpc_release,
};
/*
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 82f7a0b1d8a..112ebf8b8df 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -123,6 +123,8 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
nlm_hosts[hash] = host;
INIT_LIST_HEAD(&host->h_lockowners);
spin_lock_init(&host->h_lock);
+ INIT_LIST_HEAD(&host->h_granted);
+ INIT_LIST_HEAD(&host->h_reclaim);
if (++nrhosts > NLM_HOST_MAX)
next_gc = 0;
@@ -191,11 +193,12 @@ nlm_bind_host(struct nlm_host *host)
xprt->resvport = 1; /* NLM requires a reserved port */
/* Existing NLM servers accept AUTH_UNIX only */
- clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
+ clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
host->h_version, RPC_AUTH_UNIX);
if (IS_ERR(clnt))
goto forgetit;
clnt->cl_autobind = 1; /* turn on pmap queries */
+ clnt->cl_softrtry = 1; /* All queries are soft */
host->h_rpcclnt = clnt;
}
@@ -242,8 +245,12 @@ void nlm_release_host(struct nlm_host *host)
{
if (host != NULL) {
dprintk("lockd: release host %s\n", host->h_name);
- atomic_dec(&host->h_count);
BUG_ON(atomic_read(&host->h_count) < 0);
+ if (atomic_dec_and_test(&host->h_count)) {
+ BUG_ON(!list_empty(&host->h_lockowners));
+ BUG_ON(!list_empty(&host->h_granted));
+ BUG_ON(!list_empty(&host->h_reclaim));
+ }
}
}
@@ -331,7 +338,6 @@ nlm_gc_hosts(void)
rpc_destroy_client(host->h_rpcclnt);
}
}
- BUG_ON(!list_empty(&host->h_lockowners));
kfree(host);
nrhosts--;
}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index a89cb8aa2c8..3fc683f46b3 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -35,6 +35,10 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
struct rpc_clnt *clnt;
int status;
struct nsm_args args;
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+ .rpc_resp = res,
+ };
clnt = nsm_create();
if (IS_ERR(clnt)) {
@@ -49,7 +53,8 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
args.proc = NLMPROC_NSM_NOTIFY;
memset(res, 0, sizeof(*res));
- status = rpc_call(clnt, proc, &args, res, 0);
+ msg.rpc_proc = &clnt->cl_procinfo[proc];
+ status = rpc_call_sync(clnt, &msg, 0);
if (status < 0)
printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n",
status);
@@ -214,12 +219,16 @@ static struct rpc_procinfo nsm_procedures[] = {
.p_encode = (kxdrproc_t) xdr_encode_mon,
.p_decode = (kxdrproc_t) xdr_decode_stat_res,
.p_bufsiz = MAX(SM_mon_sz, SM_monres_sz) << 2,
+ .p_statidx = SM_MON,
+ .p_name = "MONITOR",
},
[SM_UNMON] = {
.p_proc = SM_UNMON,
.p_encode = (kxdrproc_t) xdr_encode_unmon,
.p_decode = (kxdrproc_t) xdr_decode_stat,
.p_bufsiz = MAX(SM_mon_id_sz, SM_unmonres_sz) << 2,
+ .p_statidx = SM_UNMON,
+ .p_name = "UNMONITOR",
},
};
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b10f913aa06..a2dd9ccb9b3 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -21,10 +21,6 @@
#define NLMDBG_FACILITY NLMDBG_CLIENT
-static u32 nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlm4svc_callback_ops;
-
/*
* Obtain client and file from arguments
*/
@@ -234,83 +230,89 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
}
/*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
+{
+ dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+ -task->tk_status);
+}
+
+static void nlm4svc_callback_release(void *data)
+{
+ nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlm4svc_callback_ops = {
+ .rpc_call_done = nlm4svc_callback_exit,
+ .rpc_release = nlm4svc_callback_release,
+};
+
+/*
* `Async' versions of the above service routines. They aren't really,
* because we send the callback before the reply proper. I hope this
* doesn't break any clients.
*/
-static int
-nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
- void *resp)
+static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+ int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res *))
{
- struct nlm_res res;
- u32 stat;
+ struct nlm_host *host;
+ struct nlm_rqst *call;
+ int stat;
- dprintk("lockd: TEST_MSG called\n");
- memset(&res, 0, sizeof(res));
+ host = nlmsvc_lookup_host(rqstp);
+ if (host == NULL)
+ return rpc_system_err;
+
+ call = nlm_alloc_call(host);
+ if (call == NULL)
+ return rpc_system_err;
- if ((stat = nlm4svc_proc_test(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_TEST_RES, &res);
- return stat;
+ stat = func(rqstp, argp, &call->a_res);
+ if (stat != 0) {
+ nlm_release_call(call);
+ return stat;
+ }
+
+ call->a_flags = RPC_TASK_ASYNC;
+ if (nlm_async_reply(call, proc, &nlm4svc_callback_ops) < 0)
+ return rpc_system_err;
+ return rpc_success;
}
-static int
-nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
+ dprintk("lockd: TEST_MSG called\n");
+ return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+}
+static int nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+ void *resp)
+{
dprintk("lockd: LOCK_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlm4svc_proc_lock(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, &res);
- return stat;
+ return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
}
-static int
-nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: CANCEL_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlm4svc_proc_cancel(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
- return stat;
+ return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
}
-static int
-nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: UNLOCK_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlm4svc_proc_unlock(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
- return stat;
+ return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
}
-static int
-nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: GRANTED_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlm4svc_proc_granted(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
- return stat;
+ return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
}
/*
@@ -472,55 +474,6 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
/*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
- struct nlm_host *host;
- struct nlm_rqst *call;
-
- if (!(call = nlmclnt_alloc_call()))
- return rpc_system_err;
-
- host = nlmclnt_lookup_host(&rqstp->rq_addr,
- rqstp->rq_prot, rqstp->rq_vers);
- if (!host) {
- kfree(call);
- return rpc_system_err;
- }
-
- call->a_flags = RPC_TASK_ASYNC;
- call->a_host = host;
- memcpy(&call->a_args, resp, sizeof(*resp));
-
- if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0)
- goto error;
-
- return rpc_success;
- error:
- kfree(call);
- nlm_release_host(host);
- return rpc_system_err;
-}
-
-static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
-{
- struct nlm_rqst *call = data;
-
- if (task->tk_status < 0) {
- dprintk("lockd: %4d callback failed (errno = %d)\n",
- task->tk_pid, -task->tk_status);
- }
- nlm_release_host(call->a_host);
- kfree(call);
-}
-
-static const struct rpc_call_ops nlm4svc_callback_ops = {
- .rpc_call_done = nlm4svc_callback_exit,
-};
-
-/*
* NLM Server procedures.
*/
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 9cfced65d4a..d2b66bad7d5 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -39,9 +39,12 @@
#define nlm_deadlock nlm_lck_denied
#endif
+static void nlmsvc_release_block(struct nlm_block *block);
static void nlmsvc_insert_block(struct nlm_block *block, unsigned long);
static int nlmsvc_remove_block(struct nlm_block *block);
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
+static void nlmsvc_freegrantargs(struct nlm_rqst *call);
static const struct rpc_call_ops nlmsvc_grant_ops;
/*
@@ -58,6 +61,7 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
struct nlm_block **bp, *b;
dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
+ kref_get(&block->b_count);
if (block->b_queued)
nlmsvc_remove_block(block);
bp = &nlm_blocked;
@@ -90,6 +94,7 @@ nlmsvc_remove_block(struct nlm_block *block)
if (b == block) {
*bp = block->b_next;
block->b_queued = 0;
+ nlmsvc_release_block(block);
return 1;
}
}
@@ -98,11 +103,10 @@ nlmsvc_remove_block(struct nlm_block *block)
}
/*
- * Find a block for a given lock and optionally remove it from
- * the list.
+ * Find a block for a given lock
*/
static struct nlm_block *
-nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
+nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
{
struct nlm_block **head, *block;
struct file_lock *fl;
@@ -112,17 +116,14 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end, lock->fl.fl_type);
for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) {
- fl = &block->b_call.a_args.lock.fl;
+ fl = &block->b_call->a_args.lock.fl;
dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
block->b_file, fl->fl_pid,
(long long)fl->fl_start,
(long long)fl->fl_end, fl->fl_type,
- nlmdbg_cookie2a(&block->b_call.a_args.cookie));
+ nlmdbg_cookie2a(&block->b_call->a_args.cookie));
if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
- if (remove) {
- *head = block->b_next;
- block->b_queued = 0;
- }
+ kref_get(&block->b_count);
return block;
}
}
@@ -150,11 +151,13 @@ nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin)
for (block = nlm_blocked; block; block = block->b_next) {
dprintk("cookie: head of blocked queue %p, block %p\n",
nlm_blocked, block);
- if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie)
+ if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)
&& nlm_cmp_addr(sin, &block->b_host->h_addr))
break;
}
+ if (block != NULL)
+ kref_get(&block->b_count);
return block;
}
@@ -174,27 +177,30 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
{
struct nlm_block *block;
struct nlm_host *host;
- struct nlm_rqst *call;
+ struct nlm_rqst *call = NULL;
/* Create host handle for callback */
- host = nlmclnt_lookup_host(&rqstp->rq_addr,
- rqstp->rq_prot, rqstp->rq_vers);
+ host = nlmsvc_lookup_host(rqstp);
if (host == NULL)
return NULL;
+ call = nlm_alloc_call(host);
+ if (call == NULL)
+ return NULL;
+
/* Allocate memory for block, and initialize arguments */
- if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL)))
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (block == NULL)
goto failed;
- memset(block, 0, sizeof(*block));
- locks_init_lock(&block->b_call.a_args.lock.fl);
- locks_init_lock(&block->b_call.a_res.lock.fl);
+ kref_init(&block->b_count);
- if (!nlmclnt_setgrantargs(&block->b_call, lock))
+ if (!nlmsvc_setgrantargs(call, lock))
goto failed_free;
/* Set notifier function for VFS, and init args */
- block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
- block->b_call.a_args.cookie = *cookie; /* see above */
+ call->a_args.lock.fl.fl_flags |= FL_SLEEP;
+ call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
+ call->a_args.cookie = *cookie; /* see above */
dprintk("lockd: created block %p...\n", block);
@@ -202,22 +208,23 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
block->b_daemon = rqstp->rq_server;
block->b_host = host;
block->b_file = file;
+ file->f_count++;
/* Add to file's list of blocks */
block->b_fnext = file->f_blocks;
file->f_blocks = block;
/* Set up RPC arguments for callback */
- call = &block->b_call;
- call->a_host = host;
+ block->b_call = call;
call->a_flags = RPC_TASK_ASYNC;
+ call->a_block = block;
return block;
failed_free:
kfree(block);
failed:
- nlm_release_host(host);
+ nlm_release_call(call);
return NULL;
}
@@ -227,29 +234,26 @@ failed:
* It is the caller's responsibility to check whether the file
* can be closed hereafter.
*/
-static int
-nlmsvc_delete_block(struct nlm_block *block, int unlock)
+static int nlmsvc_unlink_block(struct nlm_block *block)
{
- struct file_lock *fl = &block->b_call.a_args.lock.fl;
- struct nlm_file *file = block->b_file;
- struct nlm_block **bp;
- int status = 0;
-
- dprintk("lockd: deleting block %p...\n", block);
+ int status;
+ dprintk("lockd: unlinking block %p...\n", block);
/* Remove block from list */
+ status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl);
nlmsvc_remove_block(block);
- if (unlock)
- status = posix_unblock_lock(file->f_file, fl);
+ return status;
+}
- /* If the block is in the middle of a GRANT callback,
- * don't kill it yet. */
- if (block->b_incall) {
- nlmsvc_insert_block(block, NLM_NEVER);
- block->b_done = 1;
- return status;
- }
+static void nlmsvc_free_block(struct kref *kref)
+{
+ struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
+ struct nlm_file *file = block->b_file;
+ struct nlm_block **bp;
+ dprintk("lockd: freeing block %p...\n", block);
+
+ down(&file->f_sema);
/* Remove block from file's list of blocks */
for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
if (*bp == block) {
@@ -257,36 +261,93 @@ nlmsvc_delete_block(struct nlm_block *block, int unlock)
break;
}
}
+ up(&file->f_sema);
- if (block->b_host)
- nlm_release_host(block->b_host);
- nlmclnt_freegrantargs(&block->b_call);
+ nlmsvc_freegrantargs(block->b_call);
+ nlm_release_call(block->b_call);
+ nlm_release_file(block->b_file);
kfree(block);
- return status;
+}
+
+static void nlmsvc_release_block(struct nlm_block *block)
+{
+ if (block != NULL)
+ kref_put(&block->b_count, nlmsvc_free_block);
+}
+
+static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file)
+{
+ struct nlm_block *block;
+
+ down(&file->f_sema);
+ for (block = file->f_blocks; block != NULL; block = block->b_fnext)
+ block->b_host->h_inuse = 1;
+ up(&file->f_sema);
+}
+
+static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file)
+{
+ struct nlm_block *block;
+
+restart:
+ down(&file->f_sema);
+ for (block = file->f_blocks; block != NULL; block = block->b_fnext) {
+ if (host != NULL && host != block->b_host)
+ continue;
+ if (!block->b_queued)
+ continue;
+ kref_get(&block->b_count);
+ up(&file->f_sema);
+ nlmsvc_unlink_block(block);
+ nlmsvc_release_block(block);
+ goto restart;
+ }
+ up(&file->f_sema);
}
/*
* Loop over all blocks and perform the action specified.
* (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
*/
-int
+void
nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
{
- struct nlm_block *block, *next;
- /* XXX: Will everything get cleaned up if we don't unlock here? */
+ if (action == NLM_ACT_MARK)
+ nlmsvc_act_mark(host, file);
+ else
+ nlmsvc_act_unlock(host, file);
+}
- down(&file->f_sema);
- for (block = file->f_blocks; block; block = next) {
- next = block->b_fnext;
- if (action == NLM_ACT_MARK)
- block->b_host->h_inuse = 1;
- else if (action == NLM_ACT_UNLOCK) {
- if (host == NULL || host == block->b_host)
- nlmsvc_delete_block(block, 1);
- }
+/*
+ * Initialize arguments for GRANTED call. The nlm_rqst structure
+ * has been cleared already.
+ */
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
+{
+ locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
+ memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
+ call->a_args.lock.caller = system_utsname.nodename;
+ call->a_args.lock.oh.len = lock->oh.len;
+
+ /* set default data area */
+ call->a_args.lock.oh.data = call->a_owner;
+ call->a_args.lock.svid = lock->fl.fl_pid;
+
+ if (lock->oh.len > NLMCLNT_OHSIZE) {
+ void *data = kmalloc(lock->oh.len, GFP_KERNEL);
+ if (!data)
+ return 0;
+ call->a_args.lock.oh.data = (u8 *) data;
}
- up(&file->f_sema);
- return 0;
+
+ memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
+ return 1;
+}
+
+static void nlmsvc_freegrantargs(struct nlm_rqst *call)
+{
+ if (call->a_args.lock.oh.data != call->a_owner)
+ kfree(call->a_args.lock.oh.data);
}
/*
@@ -297,9 +358,9 @@ u32
nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
{
- struct file_lock *conflock;
- struct nlm_block *block;
+ struct nlm_block *block, *newblock = NULL;
int error;
+ u32 ret;
dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -310,69 +371,65 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
wait);
- /* Get existing block (in case client is busy-waiting) */
- block = nlmsvc_lookup_block(file, lock, 0);
-
- lock->fl.fl_flags |= FL_LOCKD;
-
+ lock->fl.fl_flags &= ~FL_SLEEP;
again:
/* Lock file against concurrent access */
down(&file->f_sema);
+ /* Get existing block (in case client is busy-waiting) */
+ block = nlmsvc_lookup_block(file, lock);
+ if (block == NULL) {
+ if (newblock != NULL)
+ lock = &newblock->b_call->a_args.lock;
+ } else
+ lock = &block->b_call->a_args.lock;
- if (!(conflock = posix_test_lock(file->f_file, &lock->fl))) {
- error = posix_lock_file(file->f_file, &lock->fl);
+ error = posix_lock_file(file->f_file, &lock->fl);
+ lock->fl.fl_flags &= ~FL_SLEEP;
- if (block)
- nlmsvc_delete_block(block, 0);
- up(&file->f_sema);
+ dprintk("lockd: posix_lock_file returned %d\n", error);
- dprintk("lockd: posix_lock_file returned %d\n", -error);
- switch(-error) {
+ switch(error) {
case 0:
- return nlm_granted;
- case EDEADLK:
- return nlm_deadlock;
- case EAGAIN:
- return nlm_lck_denied;
+ ret = nlm_granted;
+ goto out;
+ case -EAGAIN:
+ break;
+ case -EDEADLK:
+ ret = nlm_deadlock;
+ goto out;
default: /* includes ENOLCK */
- return nlm_lck_denied_nolocks;
- }
+ ret = nlm_lck_denied_nolocks;
+ goto out;
}
- if (!wait) {
- up(&file->f_sema);
- return nlm_lck_denied;
- }
+ ret = nlm_lck_denied;
+ if (!wait)
+ goto out;
- if (posix_locks_deadlock(&lock->fl, conflock)) {
- up(&file->f_sema);
- return nlm_deadlock;
- }
+ ret = nlm_lck_blocked;
+ if (block != NULL)
+ goto out;
/* If we don't have a block, create and initialize it. Then
* retry because we may have slept in kmalloc. */
/* We have to release f_sema as nlmsvc_create_block may try to
* to claim it while doing host garbage collection */
- if (block == NULL) {
+ if (newblock == NULL) {
up(&file->f_sema);
dprintk("lockd: blocking on this lock (allocating).\n");
- if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie)))
+ if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
return nlm_lck_denied_nolocks;
goto again;
}
/* Append to list of blocked */
- nlmsvc_insert_block(block, NLM_NEVER);
-
- if (list_empty(&block->b_call.a_args.lock.fl.fl_block)) {
- /* Now add block to block list of the conflicting lock
- if we haven't done so. */
- dprintk("lockd: blocking on this lock.\n");
- posix_block_lock(conflock, &block->b_call.a_args.lock.fl);
- }
-
+ nlmsvc_insert_block(newblock, NLM_NEVER);
+out:
up(&file->f_sema);
- return nlm_lck_blocked;
+ nlmsvc_release_block(newblock);
+ nlmsvc_release_block(block);
+ dprintk("lockd: nlmsvc_lock returned %u\n", ret);
+ return ret;
}
/*
@@ -382,8 +439,6 @@ u32
nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
struct nlm_lock *conflock)
{
- struct file_lock *fl;
-
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
file->f_file->f_dentry->d_inode->i_sb->s_id,
file->f_file->f_dentry->d_inode->i_ino,
@@ -391,13 +446,14 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
- if ((fl = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
+ if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) {
dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
- fl->fl_type, (long long)fl->fl_start,
- (long long)fl->fl_end);
+ conflock->fl.fl_type,
+ (long long)conflock->fl.fl_start,
+ (long long)conflock->fl.fl_end);
conflock->caller = "somehost"; /* FIXME */
conflock->oh.len = 0; /* don't return OH info */
- conflock->fl = *fl;
+ conflock->svid = conflock->fl.fl_pid;
return nlm_lck_denied;
}
@@ -453,9 +509,12 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
(long long)lock->fl.fl_end);
down(&file->f_sema);
- if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
- status = nlmsvc_delete_block(block, 1);
+ block = nlmsvc_lookup_block(file, lock);
up(&file->f_sema);
+ if (block != NULL) {
+ status = nlmsvc_unlink_block(block);
+ nlmsvc_release_block(block);
+ }
return status ? nlm_lck_denied : nlm_granted;
}
@@ -473,7 +532,7 @@ nlmsvc_notify_blocked(struct file_lock *fl)
dprintk("lockd: VFS unblock notification for block %p\n", fl);
for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) {
- if (nlm_compare_locks(&block->b_call.a_args.lock.fl, fl)) {
+ if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
nlmsvc_insert_block(block, 0);
svc_wake_up(block->b_daemon);
return;
@@ -508,17 +567,13 @@ static void
nlmsvc_grant_blocked(struct nlm_block *block)
{
struct nlm_file *file = block->b_file;
- struct nlm_lock *lock = &block->b_call.a_args.lock;
- struct file_lock *conflock;
+ struct nlm_lock *lock = &block->b_call->a_args.lock;
int error;
dprintk("lockd: grant blocked lock %p\n", block);
- /* First thing is lock the file */
- down(&file->f_sema);
-
/* Unlink block request from list */
- nlmsvc_remove_block(block);
+ nlmsvc_unlink_block(block);
/* If b_granted is true this means we've been here before.
* Just retry the grant callback, possibly refreshing the RPC
@@ -529,24 +584,21 @@ nlmsvc_grant_blocked(struct nlm_block *block)
}
/* Try the lock operation again */
- if ((conflock = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
- /* Bummer, we blocked again */
+ lock->fl.fl_flags |= FL_SLEEP;
+ error = posix_lock_file(file->f_file, &lock->fl);
+ lock->fl.fl_flags &= ~FL_SLEEP;
+
+ switch (error) {
+ case 0:
+ break;
+ case -EAGAIN:
dprintk("lockd: lock still blocked\n");
nlmsvc_insert_block(block, NLM_NEVER);
- posix_block_lock(conflock, &lock->fl);
- up(&file->f_sema);
return;
- }
-
- /* Alright, no conflicting lock. Now lock it for real. If the
- * following yields an error, this is most probably due to low
- * memory. Retry the lock in a few seconds.
- */
- if ((error = posix_lock_file(file->f_file, &lock->fl)) < 0) {
+ default:
printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
-error, __FUNCTION__);
nlmsvc_insert_block(block, 10 * HZ);
- up(&file->f_sema);
return;
}
@@ -554,17 +606,15 @@ callback:
/* Lock was granted by VFS. */
dprintk("lockd: GRANTing blocked lock.\n");
block->b_granted = 1;
- block->b_incall = 1;
/* Schedule next grant callback in 30 seconds */
nlmsvc_insert_block(block, 30 * HZ);
/* Call the client */
- nlm_get_host(block->b_call.a_host);
- if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
+ kref_get(&block->b_count);
+ if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
&nlmsvc_grant_ops) < 0)
- nlm_release_host(block->b_call.a_host);
- up(&file->f_sema);
+ nlmsvc_release_block(block);
}
/*
@@ -578,20 +628,10 @@ callback:
static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
{
struct nlm_rqst *call = data;
- struct nlm_block *block;
+ struct nlm_block *block = call->a_block;
unsigned long timeout;
- struct sockaddr_in *peer_addr = RPC_PEERADDR(task->tk_client);
dprintk("lockd: GRANT_MSG RPC callback\n");
- dprintk("callback: looking for cookie %s, host (%u.%u.%u.%u)\n",
- nlmdbg_cookie2a(&call->a_args.cookie),
- NIPQUAD(peer_addr->sin_addr.s_addr));
- if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) {
- dprintk("lockd: no block for cookie %s, host (%u.%u.%u.%u)\n",
- nlmdbg_cookie2a(&call->a_args.cookie),
- NIPQUAD(peer_addr->sin_addr.s_addr));
- return;
- }
/* Technically, we should down the file semaphore here. Since we
* move the block towards the head of the queue only, no harm
@@ -608,13 +648,18 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
}
nlmsvc_insert_block(block, timeout);
svc_wake_up(block->b_daemon);
- block->b_incall = 0;
+}
- nlm_release_host(call->a_host);
+void nlmsvc_grant_release(void *data)
+{
+ struct nlm_rqst *call = data;
+
+ nlmsvc_release_block(call->a_block);
}
static const struct rpc_call_ops nlmsvc_grant_ops = {
.rpc_call_done = nlmsvc_grant_callback,
+ .rpc_release = nlmsvc_grant_release,
};
/*
@@ -634,25 +679,17 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
return;
file = block->b_file;
- file->f_count++;
- down(&file->f_sema);
- block = nlmsvc_find_block(cookie, &rqstp->rq_addr);
if (block) {
if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
/* Try again in a couple of seconds */
nlmsvc_insert_block(block, 10 * HZ);
- up(&file->f_sema);
} else {
/* Lock is now held by client, or has been rejected.
* In both cases, the block should be removed. */
- up(&file->f_sema);
- if (status == NLM_LCK_GRANTED)
- nlmsvc_delete_block(block, 0);
- else
- nlmsvc_delete_block(block, 1);
+ nlmsvc_unlink_block(block);
}
}
- nlm_release_file(file);
+ nlmsvc_release_block(block);
}
/*
@@ -675,10 +712,12 @@ nlmsvc_retry_blocked(void)
break;
dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
block, block->b_when, block->b_done);
+ kref_get(&block->b_count);
if (block->b_done)
- nlmsvc_delete_block(block, 0);
+ nlmsvc_unlink_block(block);
else
nlmsvc_grant_blocked(block);
+ nlmsvc_release_block(block);
}
if ((block = nlm_blocked) && block->b_when != NLM_NEVER)
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 35681d9cf1f..d210cf304e9 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -22,10 +22,6 @@
#define NLMDBG_FACILITY NLMDBG_CLIENT
-static u32 nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlmsvc_callback_ops;
-
#ifdef CONFIG_LOCKD_V4
static u32
cast_to_nlm(u32 status, u32 vers)
@@ -262,83 +258,91 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
}
/*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
+{
+ dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+ -task->tk_status);
+}
+
+static void nlmsvc_callback_release(void *data)
+{
+ nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlmsvc_callback_ops = {
+ .rpc_call_done = nlmsvc_callback_exit,
+ .rpc_release = nlmsvc_callback_release,
+};
+
+/*
* `Async' versions of the above service routines. They aren't really,
* because we send the callback before the reply proper. I hope this
* doesn't break any clients.
*/
-static int
-nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
- void *resp)
+static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+ int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res *))
{
- struct nlm_res res;
- u32 stat;
+ struct nlm_host *host;
+ struct nlm_rqst *call;
+ int stat;
- dprintk("lockd: TEST_MSG called\n");
- memset(&res, 0, sizeof(res));
+ host = nlmsvc_lookup_host(rqstp);
+ if (host == NULL)
+ return rpc_system_err;
- if ((stat = nlmsvc_proc_test(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_TEST_RES, &res);
- return stat;
+ call = nlm_alloc_call(host);
+ if (call == NULL)
+ return rpc_system_err;
+
+ stat = func(rqstp, argp, &call->a_res);
+ if (stat != 0) {
+ nlm_release_call(call);
+ return stat;
+ }
+
+ call->a_flags = RPC_TASK_ASYNC;
+ if (nlm_async_reply(call, proc, &nlmsvc_callback_ops) < 0)
+ return rpc_system_err;
+ return rpc_success;
}
-static int
-nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
+ dprintk("lockd: TEST_MSG called\n");
+ return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+}
+static int nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+ void *resp)
+{
dprintk("lockd: LOCK_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlmsvc_proc_lock(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, &res);
- return stat;
+ return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
}
-static int
-nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: CANCEL_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlmsvc_proc_cancel(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
- return stat;
+ return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
}
static int
nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: UNLOCK_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlmsvc_proc_unlock(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
- return stat;
+ return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
}
static int
nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: GRANTED_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlmsvc_proc_granted(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
- return stat;
+ return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
}
/*
@@ -497,55 +501,6 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
}
/*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
- struct nlm_host *host;
- struct nlm_rqst *call;
-
- if (!(call = nlmclnt_alloc_call()))
- return rpc_system_err;
-
- host = nlmclnt_lookup_host(&rqstp->rq_addr,
- rqstp->rq_prot, rqstp->rq_vers);
- if (!host) {
- kfree(call);
- return rpc_system_err;
- }
-
- call->a_flags = RPC_TASK_ASYNC;
- call->a_host = host;
- memcpy(&call->a_args, resp, sizeof(*resp));
-
- if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0)
- goto error;
-
- return rpc_success;
- error:
- nlm_release_host(host);
- kfree(call);
- return rpc_system_err;
-}
-
-static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
-{
- struct nlm_rqst *call = data;
-
- if (task->tk_status < 0) {
- dprintk("lockd: %4d callback failed (errno = %d)\n",
- task->tk_pid, -task->tk_status);
- }
- nlm_release_host(call->a_host);
- kfree(call);
-}
-
-static const struct rpc_call_ops nlmsvc_callback_ops = {
- .rpc_call_done = nlmsvc_callback_exit,
-};
-
-/*
* NLM Server procedures.
*/
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 4943fb7836c..27288c83da9 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -88,7 +88,7 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
* Traverse all shares for a given file (and host).
* NLM_ACT_CHECK is handled by nlmsvc_inspect_file.
*/
-int
+void
nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
{
struct nlm_share *share, **shpp;
@@ -106,6 +106,4 @@ nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
}
shpp = &share->s_next;
}
-
- return 0;
}
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 62f4a385177..c7a6e3ae44d 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -182,7 +182,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action)
again:
file->f_locks = 0;
for (fl = inode->i_flock; fl; fl = fl->fl_next) {
- if (!(fl->fl_flags & FL_LOCKD))
+ if (fl->fl_lmops != &nlmsvc_lock_operations)
continue;
/* update current lock count */
@@ -224,9 +224,8 @@ nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action)
if (file->f_count || file->f_blocks || file->f_shares)
return 1;
} else {
- if (nlmsvc_traverse_blocks(host, file, action)
- || nlmsvc_traverse_shares(host, file, action))
- return 1;
+ nlmsvc_traverse_blocks(host, file, action);
+ nlmsvc_traverse_shares(host, file, action);
}
return nlm_traverse_locks(host, file, action);
}
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 1d700a4dd0b..f22a3764461 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -131,10 +131,11 @@ nlm_decode_lock(u32 *p, struct nlm_lock *lock)
|| !(p = nlm_decode_fh(p, &lock->fh))
|| !(p = nlm_decode_oh(p, &lock->oh)))
return NULL;
+ lock->svid = ntohl(*p++);
locks_init_lock(fl);
fl->fl_owner = current->files;
- fl->fl_pid = ntohl(*p++);
+ fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
start = ntohl(*p++);
@@ -174,7 +175,7 @@ nlm_encode_lock(u32 *p, struct nlm_lock *lock)
else
len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
- *p++ = htonl(fl->fl_pid);
+ *p++ = htonl(lock->svid);
*p++ = htonl(start);
*p++ = htonl(len);
@@ -197,7 +198,7 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp)
struct file_lock *fl = &resp->lock.fl;
*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
- *p++ = htonl(fl->fl_pid);
+ *p++ = htonl(resp->lock.svid);
/* Encode owner handle. */
if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -298,7 +299,8 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
- lock->fl.fl_pid = ~(u32) 0;
+ lock->svid = ~(u32) 0;
+ lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -415,7 +417,8 @@ nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
memset(&resp->lock, 0, sizeof(resp->lock));
locks_init_lock(fl);
excl = ntohl(*p++);
- fl->fl_pid = ntohl(*p++);
+ resp->lock.svid = ntohl(*p++);
+ fl->fl_pid = (pid_t)resp->lock.svid;
if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
return -EIO;
@@ -543,7 +546,9 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
.p_proc = NLMPROC_##proc, \
.p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \
.p_decode = (kxdrproc_t) nlmclt_decode_##restype, \
- .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2 \
+ .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \
+ .p_statidx = NLMPROC_##proc, \
+ .p_name = #proc, \
}
static struct rpc_procinfo nlm_procedures[] = {
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index fdcf105a530..36eb175ec33 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -130,10 +130,11 @@ nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
|| !(p = nlm4_decode_fh(p, &lock->fh))
|| !(p = nlm4_decode_oh(p, &lock->oh)))
return NULL;
+ lock->svid = ntohl(*p++);
locks_init_lock(fl);
fl->fl_owner = current->files;
- fl->fl_pid = ntohl(*p++);
+ fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
p = xdr_decode_hyper(p, &start);
@@ -167,7 +168,7 @@ nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
|| (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
return NULL;
- *p++ = htonl(fl->fl_pid);
+ *p++ = htonl(lock->svid);
start = loff_t_to_s64(fl->fl_start);
if (fl->fl_end == OFFSET_MAX)
@@ -198,7 +199,7 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
struct file_lock *fl = &resp->lock.fl;
*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
- *p++ = htonl(fl->fl_pid);
+ *p++ = htonl(resp->lock.svid);
/* Encode owner handle. */
if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -212,8 +213,8 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
p = xdr_encode_hyper(p, start);
p = xdr_encode_hyper(p, len);
- dprintk("xdr: encode_testres (status %d pid %d type %d start %Ld end %Ld)\n",
- resp->status, fl->fl_pid, fl->fl_type,
+ dprintk("xdr: encode_testres (status %u pid %d type %d start %Ld end %Ld)\n",
+ resp->status, (int)resp->lock.svid, fl->fl_type,
(long long)fl->fl_start, (long long)fl->fl_end);
}
@@ -303,7 +304,8 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
- lock->fl.fl_pid = ~(u32) 0;
+ lock->svid = ~(u32) 0;
+ lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm4_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -420,7 +422,8 @@ nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
memset(&resp->lock, 0, sizeof(resp->lock));
locks_init_lock(fl);
excl = ntohl(*p++);
- fl->fl_pid = ntohl(*p++);
+ resp->lock.svid = ntohl(*p++);
+ fl->fl_pid = (pid_t)resp->lock.svid;
if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
return -EIO;
@@ -548,7 +551,9 @@ nlm4clt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
.p_proc = NLMPROC_##proc, \
.p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \
.p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \
- .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2 \
+ .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \
+ .p_statidx = NLMPROC_##proc, \
+ .p_name = #proc, \
}
static struct rpc_procinfo nlm4_procedures[] = {
diff --git a/fs/locks.c b/fs/locks.c
index 909eab8fb1d..56f996e98bb 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,10 +139,7 @@ int lease_break_time = 45;
#define for_each_lock(inode, lockp) \
for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
-LIST_HEAD(file_lock_list);
-
-EXPORT_SYMBOL(file_lock_list);
-
+static LIST_HEAD(file_lock_list);
static LIST_HEAD(blocked_list);
static kmem_cache_t *filelock_cache;
@@ -153,6 +150,21 @@ static struct file_lock *locks_alloc_lock(void)
return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
}
+static void locks_release_private(struct file_lock *fl)
+{
+ if (fl->fl_ops) {
+ if (fl->fl_ops->fl_release_private)
+ fl->fl_ops->fl_release_private(fl);
+ fl->fl_ops = NULL;
+ }
+ if (fl->fl_lmops) {
+ if (fl->fl_lmops->fl_release_private)
+ fl->fl_lmops->fl_release_private(fl);
+ fl->fl_lmops = NULL;
+ }
+
+}
+
/* Free a lock which is not in use. */
static void locks_free_lock(struct file_lock *fl)
{
@@ -169,18 +181,7 @@ static void locks_free_lock(struct file_lock *fl)
if (!list_empty(&fl->fl_link))
panic("Attempting to free lock on active lock list");
- if (fl->fl_ops) {
- if (fl->fl_ops->fl_release_private)
- fl->fl_ops->fl_release_private(fl);
- fl->fl_ops = NULL;
- }
-
- if (fl->fl_lmops) {
- if (fl->fl_lmops->fl_release_private)
- fl->fl_lmops->fl_release_private(fl);
- fl->fl_lmops = NULL;
- }
-
+ locks_release_private(fl);
kmem_cache_free(filelock_cache, fl);
}
@@ -218,24 +219,46 @@ static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
locks_init_lock(lock);
}
+static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
+{
+ if (fl->fl_ops) {
+ if (fl->fl_ops->fl_copy_lock)
+ fl->fl_ops->fl_copy_lock(new, fl);
+ new->fl_ops = fl->fl_ops;
+ }
+ if (fl->fl_lmops) {
+ if (fl->fl_lmops->fl_copy_lock)
+ fl->fl_lmops->fl_copy_lock(new, fl);
+ new->fl_lmops = fl->fl_lmops;
+ }
+}
+
/*
* Initialize a new lock from an existing file_lock structure.
*/
-void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
{
new->fl_owner = fl->fl_owner;
new->fl_pid = fl->fl_pid;
- new->fl_file = fl->fl_file;
+ new->fl_file = NULL;
new->fl_flags = fl->fl_flags;
new->fl_type = fl->fl_type;
new->fl_start = fl->fl_start;
new->fl_end = fl->fl_end;
+ new->fl_ops = NULL;
+ new->fl_lmops = NULL;
+}
+
+void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+ locks_release_private(new);
+
+ __locks_copy_lock(new, fl);
+ new->fl_file = fl->fl_file;
new->fl_ops = fl->fl_ops;
new->fl_lmops = fl->fl_lmops;
- if (fl->fl_ops && fl->fl_ops->fl_copy_lock)
- fl->fl_ops->fl_copy_lock(new, fl);
- if (fl->fl_lmops && fl->fl_lmops->fl_copy_lock)
- fl->fl_lmops->fl_copy_lock(new, fl);
+
+ locks_copy_private(new, fl);
}
EXPORT_SYMBOL(locks_copy_lock);
@@ -654,8 +677,9 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w
return result;
}
-struct file_lock *
-posix_test_lock(struct file *filp, struct file_lock *fl)
+int
+posix_test_lock(struct file *filp, struct file_lock *fl,
+ struct file_lock *conflock)
{
struct file_lock *cfl;
@@ -666,9 +690,13 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
if (posix_locks_conflict(cfl, fl))
break;
}
+ if (cfl) {
+ __locks_copy_lock(conflock, cfl);
+ unlock_kernel();
+ return 1;
+ }
unlock_kernel();
-
- return (cfl);
+ return 0;
}
EXPORT_SYMBOL(posix_test_lock);
@@ -904,7 +932,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
fl->fl_start = request->fl_start;
fl->fl_end = request->fl_end;
fl->fl_type = request->fl_type;
- fl->fl_u = request->fl_u;
+ locks_release_private(fl);
+ locks_copy_private(fl, request);
request = fl;
added = 1;
}
@@ -1544,7 +1573,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
*/
int fcntl_getlk(struct file *filp, struct flock __user *l)
{
- struct file_lock *fl, file_lock;
+ struct file_lock *fl, cfl, file_lock;
struct flock flock;
int error;
@@ -1568,7 +1597,7 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
else
fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
} else {
- fl = posix_test_lock(filp, &file_lock);
+ fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
}
flock.l_type = F_UNLCK;
@@ -1698,7 +1727,7 @@ out:
*/
int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
{
- struct file_lock *fl, file_lock;
+ struct file_lock *fl, cfl, file_lock;
struct flock64 flock;
int error;
@@ -1722,7 +1751,7 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
else
fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
} else {
- fl = posix_test_lock(filp, &file_lock);
+ fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
}
flock.l_type = F_UNLCK;
@@ -1936,21 +1965,6 @@ void locks_remove_flock(struct file *filp)
}
/**
- * posix_block_lock - blocks waiting for a file lock
- * @blocker: the lock which is blocking
- * @waiter: the lock which conflicts and has to wait
- *
- * lockd needs to block waiting for locks.
- */
-void
-posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
-{
- locks_insert_block(blocker, waiter);
-}
-
-EXPORT_SYMBOL(posix_block_lock);
-
-/**
* posix_unblock_lock - stop waiting for a file lock
* @filp: how the file was opened
* @waiter: the lock which was waiting
diff --git a/fs/namespace.c b/fs/namespace.c
index 39c81a8d631..71e75bcf4d2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -399,6 +399,44 @@ struct seq_operations mounts_op = {
.show = show_vfsmnt
};
+static int show_vfsstat(struct seq_file *m, void *v)
+{
+ struct vfsmount *mnt = v;
+ int err = 0;
+
+ /* device */
+ if (mnt->mnt_devname) {
+ seq_puts(m, "device ");
+ mangle(m, mnt->mnt_devname);
+ } else
+ seq_puts(m, "no device");
+
+ /* mount point */
+ seq_puts(m, " mounted on ");
+ seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+ seq_putc(m, ' ');
+
+ /* file system type */
+ seq_puts(m, "with fstype ");
+ mangle(m, mnt->mnt_sb->s_type->name);
+
+ /* optional statistics */
+ if (mnt->mnt_sb->s_op->show_stats) {
+ seq_putc(m, ' ');
+ err = mnt->mnt_sb->s_op->show_stats(m, mnt);
+ }
+
+ seq_putc(m, '\n');
+ return err;
+}
+
+struct seq_operations mountstats_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_vfsstat,
+};
+
/**
* may_umount_tree - check if a mount tree is busy
* @mnt: root of mount tree
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fcd97406a77..99d2cfbce86 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -55,7 +55,12 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
complete(&nfs_callback_info.started);
- while (nfs_callback_info.users != 0 || !signalled()) {
+ for(;;) {
+ if (signalled()) {
+ if (nfs_callback_info.users == 0)
+ break;
+ flush_signals(current);
+ }
/*
* Listen for a request on the socket
*/
@@ -73,6 +78,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
svc_process(serv, rqstp);
}
+ svc_exit_thread(rqstp);
nfs_callback_info.pid = 0;
complete(&nfs_callback_info.stopped);
unlock_kernel();
@@ -134,11 +140,13 @@ int nfs_callback_down(void)
lock_kernel();
down(&nfs_callback_sema);
- if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
- goto out;
- kill_proc(nfs_callback_info.pid, SIGKILL, 1);
- wait_for_completion(&nfs_callback_info.stopped);
-out:
+ nfs_callback_info.users--;
+ do {
+ if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0)
+ break;
+ if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
+ break;
+ } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
up(&nfs_callback_sema);
unlock_kernel();
return ret;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 7c33b9a81a9..05c38cf40b6 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
{
- uint32_t *savep;
+ uint32_t *savep = NULL;
unsigned status = res->status;
if (unlikely(status != 0))
@@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rqst *rqstp,
struct xdr_stream *xdr_in, void *argp,
struct xdr_stream *xdr_out, void *resp)
{
- struct callback_op *op;
- unsigned int op_nr;
+ struct callback_op *op = &callback_ops[0];
+ unsigned int op_nr = OP_CB_ILLEGAL;
unsigned int status = 0;
long maxlen;
unsigned res;
dprintk("%s: start\n", __FUNCTION__);
status = decode_op_hdr(xdr_in, &op_nr);
- if (unlikely(status != 0)) {
- op_nr = OP_CB_ILLEGAL;
- op = &callback_ops[0];
- } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
- op_nr = OP_CB_ILLEGAL;
- op = &callback_ops[0];
- status = htonl(NFS4ERR_OP_ILLEGAL);
- } else
- op = &callback_ops[op_nr];
+ if (likely(status == 0)) {
+ switch (op_nr) {
+ case OP_CB_GETATTR:
+ case OP_CB_RECALL:
+ op = &callback_ops[op_nr];
+ break;
+ default:
+ op_nr = OP_CB_ILLEGAL;
+ op = &callback_ops[0];
+ status = htonl(NFS4ERR_OP_ILLEGAL);
+ }
+ }
maxlen = xdr_out->end - xdr_out->p;
if (maxlen > 0 && maxlen < PAGE_SIZE) {
@@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp
decode_compound_hdr_arg(&xdr_in, &hdr_arg);
hdr_res.taglen = hdr_arg.taglen;
hdr_res.tag = hdr_arg.tag;
+ hdr_res.nops = NULL;
encode_compound_hdr_res(&xdr_out, &hdr_res);
for (;;) {
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c6f07c1c71e..d3be923d4e4 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -421,3 +421,22 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
nfs_free_delegation(delegation);
}
}
+
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+{
+ struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_delegation *delegation;
+ int res = 0;
+
+ if (nfsi->delegation_state == 0)
+ return 0;
+ spin_lock(&clp->cl_lock);
+ delegation = nfsi->delegation;
+ if (delegation != NULL) {
+ memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
+ res = 1;
+ }
+ spin_unlock(&clp->cl_lock);
+ return res;
+}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 7a0b2bfce77..3858694652f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -41,6 +41,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
static inline int nfs_have_delegation(struct inode *inode, int flags)
{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a1554bead69..06c48b385c9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
+#include "iostat.h"
#define NFS_PARANOIA 1
/* #define NFS_DEBUG_VERBOSE 1 */
@@ -129,6 +130,9 @@ nfs_opendir(struct inode *inode, struct file *filp)
{
int res = 0;
+ dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
+ inode->i_sb->s_id, inode->i_ino);
+
lock_kernel();
/* Call generic open code in order to cache credentials */
if (!res)
@@ -172,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
unsigned long timestamp;
int error;
- dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
+ dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
+ __FUNCTION__, (long long)desc->entry->cookie,
+ page->index);
again:
timestamp = jiffies;
@@ -244,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
status;
while((status = dir_decode(desc)) == 0) {
- dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie);
+ dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n",
+ __FUNCTION__, (unsigned long long)entry->cookie);
if (entry->prev_cookie == *desc->dir_cookie)
break;
if (loop_count++ > 200) {
@@ -252,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
schedule();
}
}
- dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
return status;
}
@@ -276,7 +282,8 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
if (status)
break;
- dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index);
+ dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n",
+ (unsigned long long)entry->cookie, desc->current_index);
if (desc->file->f_pos == desc->current_index) {
*desc->dir_cookie = entry->cookie;
@@ -288,7 +295,6 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
schedule();
}
}
- dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
return status;
}
@@ -303,7 +309,9 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
struct page *page;
int status;
- dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index);
+ dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n",
+ __FUNCTION__, desc->page_index,
+ (long long) *desc->dir_cookie);
page = read_cache_page(inode->i_mapping, desc->page_index,
(filler_t *)nfs_readdir_filler, desc);
@@ -324,7 +332,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
if (status < 0)
dir_page_release(desc);
out:
- dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status);
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status);
return status;
read_error:
page_cache_release(page);
@@ -346,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
/* Always search-by-index from the beginning of the cache */
if (*desc->dir_cookie == 0) {
- dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos);
+ dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
+ (long long)desc->file->f_pos);
desc->page_index = 0;
desc->entry->cookie = desc->entry->prev_cookie = 0;
desc->entry->eof = 0;
desc->current_index = 0;
} else
- dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+ dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
+ (unsigned long long)*desc->dir_cookie);
for (;;) {
res = find_dirent_page(desc);
@@ -365,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
schedule();
}
}
- dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res);
+
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res);
return res;
}
@@ -390,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
int loop_count = 0,
res;
- dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie);
+ dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
+ (unsigned long long)entry->cookie);
for(;;) {
unsigned d_type = DT_UNKNOWN;
@@ -427,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
dir_page_release(desc);
if (dentry != NULL)
dput(dentry);
- dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res);
+ dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
+ (unsigned long long)*desc->dir_cookie, res);
return res;
}
@@ -453,7 +466,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
struct page *page = NULL;
int status;
- dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+ dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
+ (unsigned long long)*desc->dir_cookie);
page = alloc_page(GFP_HIGHUSER);
if (!page) {
@@ -485,7 +499,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
desc->entry->cookie = desc->entry->prev_cookie = 0;
desc->entry->eof = 0;
out:
- dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status);
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
+ __FUNCTION__, status);
return status;
out_release:
dir_page_release(desc);
@@ -507,6 +522,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
struct nfs_fattr fattr;
long res;
+ dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ (long long)filp->f_pos);
+ nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
+
lock_kernel();
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
@@ -566,9 +586,12 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
}
unlock_kernel();
- if (res < 0)
- return res;
- return 0;
+ if (res > 0)
+ res = 0;
+ dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ res);
+ return res;
}
loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
@@ -599,6 +622,10 @@ out:
*/
int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
{
+ dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ datasync);
+
return 0;
}
@@ -713,6 +740,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
parent = dget_parent(dentry);
lock_kernel();
dir = parent->d_inode;
+ nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = dentry->d_inode;
if (!inode) {
@@ -722,8 +750,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
}
if (is_bad_inode(inode)) {
- dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
- dentry->d_parent->d_name.name, dentry->d_name.name);
+ dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+ __FUNCTION__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
goto out_bad;
}
@@ -755,6 +784,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
out_valid:
unlock_kernel();
dput(parent);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
+ __FUNCTION__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
return 1;
out_zap_parent:
nfs_zap_caches(dir);
@@ -771,6 +803,9 @@ out_zap_parent:
d_drop(dentry);
unlock_kernel();
dput(parent);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+ __FUNCTION__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
return 0;
}
@@ -844,6 +879,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
dfprintk(VFS, "NFS: lookup(%s/%s)\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
+ nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
res = ERR_PTR(-ENAMETOOLONG);
if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
@@ -865,9 +901,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
res = ERR_PTR(error);
goto out_unlock;
}
- res = ERR_PTR(-EACCES);
inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
- if (!inode)
+ res = (struct dentry *)inode;
+ if (IS_ERR(res))
goto out_unlock;
no_entry:
res = d_add_unique(dentry, inode);
@@ -912,6 +948,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
struct dentry *res = NULL;
int error;
+ dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+
/* Check that we are indeed trying to open this file */
if (!is_atomic_open(dir, nd))
goto no_open;
@@ -1057,7 +1096,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
return NULL;
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
- if (!inode) {
+ if (IS_ERR(inode)) {
dput(dentry);
return NULL;
}
@@ -1095,9 +1134,9 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
if (error < 0)
goto out_err;
}
- error = -ENOMEM;
inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
- if (inode == NULL)
+ error = PTR_ERR(inode);
+ if (IS_ERR(inode))
goto out_err;
d_instantiate(dentry, inode);
return 0;
@@ -1119,8 +1158,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
int error;
int open_flags = 0;
- dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
@@ -1153,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
struct iattr attr;
int status;
- dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
if (!new_valid_dev(rdev))
return -EINVAL;
@@ -1186,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct iattr attr;
int error;
- dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
attr.ia_valid = ATTR_MODE;
attr.ia_mode = mode | S_IFDIR;
@@ -1212,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
- dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
lock_kernel();
nfs_begin_data_update(dir);
@@ -1241,6 +1280,7 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
atomic_read(&dentry->d_count));
+ nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
#ifdef NFS_PARANOIA
if (!dentry->d_inode)
@@ -1268,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
sillycounter++;
sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
- dfprintk(VFS, "trying to rename %s to %s\n",
- dentry->d_name.name, silly);
+ dfprintk(VFS, "NFS: trying to rename %s to %s\n",
+ dentry->d_name.name, silly);
sdentry = lookup_one_len(silly, dentry->d_parent, slen);
/*
@@ -1640,6 +1680,8 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
struct rpc_cred *cred;
int res = 0;
+ nfs_inc_stats(inode, NFSIOS_VFSACCESS);
+
if (mask == 0)
goto out;
/* Is this sys_access() ? */
@@ -1679,13 +1721,15 @@ force_lookup:
res = PTR_ERR(cred);
unlock_kernel();
out:
+ dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
+ inode->i_sb->s_id, inode->i_ino, mask, res);
return res;
out_notsup:
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (res == 0)
res = generic_permission(inode, mask, NULL);
unlock_kernel();
- return res;
+ goto out;
}
/*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4ae2f3b33fe..0f583cb16dd 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -7,11 +7,11 @@
*
* There are important applications whose performance or correctness
* depends on uncached access to file data. Database clusters
- * (multiple copies of the same instance running on separate hosts)
+ * (multiple copies of the same instance running on separate hosts)
* implement their own cache coherency protocol that subsumes file
- * system cache protocols. Applications that process datasets
- * considerably larger than the client's memory do not always benefit
- * from a local cache. A streaming video server, for instance, has no
+ * system cache protocols. Applications that process datasets
+ * considerably larger than the client's memory do not always benefit
+ * from a local cache. A streaming video server, for instance, has no
* need to cache the contents of a file.
*
* When an application requests uncached I/O, all read and write requests
@@ -34,6 +34,7 @@
* 08 Jun 2003 Port to 2.5 APIs --cel
* 31 Mar 2004 Handle direct I/O without VFS support --cel
* 15 Sep 2004 Parallel async reads --cel
+ * 04 May 2005 support O_DIRECT with aio --cel
*
*/
@@ -54,10 +55,10 @@
#include <asm/uaccess.h>
#include <asm/atomic.h>
+#include "iostat.h"
+
#define NFSDBG_FACILITY NFSDBG_VFS
-#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT)
-static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty);
static kmem_cache_t *nfs_direct_cachep;
/*
@@ -65,38 +66,78 @@ static kmem_cache_t *nfs_direct_cachep;
*/
struct nfs_direct_req {
struct kref kref; /* release manager */
- struct list_head list; /* nfs_read_data structs */
- wait_queue_head_t wait; /* wait for i/o completion */
+
+ /* I/O parameters */
+ struct list_head list, /* nfs_read/write_data structs */
+ rewrite_list; /* saved nfs_write_data structs */
+ struct nfs_open_context *ctx; /* file open context info */
+ struct kiocb * iocb; /* controlling i/o request */
+ struct inode * inode; /* target file of i/o */
+ unsigned long user_addr; /* location of user's buffer */
+ size_t user_count; /* total bytes to move */
+ loff_t pos; /* starting offset in file */
struct page ** pages; /* pages in our buffer */
unsigned int npages; /* count of pages */
- atomic_t complete, /* i/os we're waiting for */
- count, /* bytes actually processed */
+
+ /* completion state */
+ spinlock_t lock; /* protect completion state */
+ int outstanding; /* i/os we're waiting for */
+ ssize_t count, /* bytes actually processed */
error; /* any reported error */
+ struct completion completion; /* wait for i/o completion */
+
+ /* commit state */
+ struct nfs_write_data * commit_data; /* special write_data for commits */
+ int flags;
+#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
+#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
+ struct nfs_writeverf verf; /* unstable write verifier */
};
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
/**
- * nfs_get_user_pages - find and set up pages underlying user's buffer
- * rw: direction (read or write)
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * @pages: returned array of page struct pointers underlying user's buffer
+ * nfs_direct_IO - NFS address space operation for direct I/O
+ * @rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * @pos: offset in file to begin the operation
+ * @nr_segs: size of iovec array
+ *
+ * The presence of this routine in the address space ops vector means
+ * the NFS client supports direct I/O. However, we shunt off direct
+ * read and write requests before the VFS gets them, so this method
+ * should never be called.
*/
-static inline int
-nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
- struct page ***pages)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+{
+ struct dentry *dentry = iocb->ki_filp->f_dentry;
+
+ dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
+ dentry->d_name.name, (long long) pos, nr_segs);
+
+ return -EINVAL;
+}
+
+static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+ int i;
+ for (i = 0; i < npages; i++) {
+ struct page *page = pages[i];
+ if (do_dirty && !PageCompound(page))
+ set_page_dirty_lock(page);
+ page_cache_release(page);
+ }
+ kfree(pages);
+}
+
+static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
{
int result = -ENOMEM;
unsigned long page_count;
size_t array_size;
- /* set an arbitrary limit to prevent type overflow */
- /* XXX: this can probably be as large as INT_MAX */
- if (size > MAX_DIRECTIO_SIZE) {
- *pages = NULL;
- return -EFBIG;
- }
-
page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
page_count -= user_addr >> PAGE_SHIFT;
@@ -108,75 +149,117 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
page_count, (rw == READ), 0,
*pages, NULL);
up_read(&current->mm->mmap_sem);
- /*
- * If we got fewer pages than expected from get_user_pages(),
- * the user buffer runs off the end of a mapping; return EFAULT.
- */
- if (result >= 0 && result < page_count) {
- nfs_free_user_pages(*pages, result, 0);
+ if (result != page_count) {
+ /*
+ * If we got fewer pages than expected from
+ * get_user_pages(), the user buffer runs off the
+ * end of a mapping; return EFAULT.
+ */
+ if (result >= 0) {
+ nfs_free_user_pages(*pages, result, 0);
+ result = -EFAULT;
+ } else
+ kfree(*pages);
*pages = NULL;
- result = -EFAULT;
}
}
return result;
}
-/**
- * nfs_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- * @npages: number of pages in the array
- * @do_dirty: dirty the pages as we release them
- */
-static void
-nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
{
- int i;
- for (i = 0; i < npages; i++) {
- struct page *page = pages[i];
- if (do_dirty && !PageCompound(page))
- set_page_dirty_lock(page);
- page_cache_release(page);
- }
- kfree(pages);
+ struct nfs_direct_req *dreq;
+
+ dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+ if (!dreq)
+ return NULL;
+
+ kref_init(&dreq->kref);
+ init_completion(&dreq->completion);
+ INIT_LIST_HEAD(&dreq->list);
+ INIT_LIST_HEAD(&dreq->rewrite_list);
+ dreq->iocb = NULL;
+ dreq->ctx = NULL;
+ spin_lock_init(&dreq->lock);
+ dreq->outstanding = 0;
+ dreq->count = 0;
+ dreq->error = 0;
+ dreq->flags = 0;
+
+ return dreq;
}
-/**
- * nfs_direct_req_release - release nfs_direct_req structure for direct read
- * @kref: kref object embedded in an nfs_direct_req structure
- *
- */
static void nfs_direct_req_release(struct kref *kref)
{
struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+
+ if (dreq->ctx != NULL)
+ put_nfs_open_context(dreq->ctx);
kmem_cache_free(nfs_direct_cachep, dreq);
}
-/**
- * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
- * @count: count of bytes for the read request
- * @rsize: local rsize setting
+/*
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
+{
+ ssize_t result = -EIOCBQUEUED;
+
+ /* Async requests don't wait here */
+ if (dreq->iocb)
+ goto out;
+
+ result = wait_for_completion_interruptible(&dreq->completion);
+
+ if (!result)
+ result = dreq->error;
+ if (!result)
+ result = dreq->count;
+
+out:
+ kref_put(&dreq->kref, nfs_direct_req_release);
+ return (ssize_t) result;
+}
+
+/*
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete. This could be long *after* we are woken up in
+ * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
*
+ * In addition, synchronous I/O uses a stack-allocated iocb. Thus we
+ * can't trust the iocb is still valid here if this is a synchronous
+ * request. If the waiter is woken prematurely, the iocb is long gone.
+ */
+static void nfs_direct_complete(struct nfs_direct_req *dreq)
+{
+ nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+
+ if (dreq->iocb) {
+ long res = (long) dreq->error;
+ if (!res)
+ res = (long) dreq->count;
+ aio_complete(dreq->iocb, res, 0);
+ }
+ complete_all(&dreq->completion);
+
+ kref_put(&dreq->kref, nfs_direct_req_release);
+}
+
+/*
* Note we also set the number of requests we have in the dreq when we are
* done. This prevents races with I/O completion so we will always wait
* until all requests have been dispatched and completed.
*/
-static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
{
struct list_head *list;
struct nfs_direct_req *dreq;
- unsigned int reads = 0;
unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+ dreq = nfs_direct_req_alloc();
if (!dreq)
return NULL;
- kref_init(&dreq->kref);
- init_waitqueue_head(&dreq->wait);
- INIT_LIST_HEAD(&dreq->list);
- atomic_set(&dreq->count, 0);
- atomic_set(&dreq->error, 0);
-
list = &dreq->list;
for(;;) {
struct nfs_read_data *data = nfs_readdata_alloc(rpages);
@@ -196,72 +279,70 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
list_add(&data->pages, list);
data->req = (struct nfs_page *) dreq;
- reads++;
+ dreq->outstanding++;
if (nbytes <= rsize)
break;
nbytes -= rsize;
}
kref_get(&dreq->kref);
- atomic_set(&dreq->complete, reads);
return dreq;
}
-/**
- * nfs_direct_read_result - handle a read reply for a direct read request
- * @data: address of NFS READ operation control block
- * @status: status of this NFS READ operation
- *
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete. This could be long *after* we are woken up in
- * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
{
+ struct nfs_read_data *data = calldata;
struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- if (likely(status >= 0))
- atomic_add(data->res.count, &dreq->count);
+ if (nfs_readpage_result(task, data) != 0)
+ return;
+
+ spin_lock(&dreq->lock);
+
+ if (likely(task->tk_status >= 0))
+ dreq->count += data->res.count;
else
- atomic_set(&dreq->error, status);
+ dreq->error = task->tk_status;
- if (unlikely(atomic_dec_and_test(&dreq->complete))) {
- nfs_free_user_pages(dreq->pages, dreq->npages, 1);
- wake_up(&dreq->wait);
- kref_put(&dreq->kref, nfs_direct_req_release);
+ if (--dreq->outstanding) {
+ spin_unlock(&dreq->lock);
+ return;
}
+
+ spin_unlock(&dreq->lock);
+ nfs_direct_complete(dreq);
}
-/**
- * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
- * @dreq: address of nfs_direct_req struct for this request
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- *
+static const struct rpc_call_ops nfs_read_direct_ops = {
+ .rpc_call_done = nfs_direct_read_result,
+ .rpc_release = nfs_readdata_release,
+};
+
+/*
* For each nfs_read_data struct that was allocated on the list, dispatch
* an NFS READ operation
*/
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
- struct inode *inode, struct nfs_open_context *ctx,
- unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
{
+ struct nfs_open_context *ctx = dreq->ctx;
+ struct inode *inode = ctx->dentry->d_inode;
struct list_head *list = &dreq->list;
struct page **pages = dreq->pages;
+ size_t count = dreq->user_count;
+ loff_t pos = dreq->pos;
+ size_t rsize = NFS_SERVER(inode)->rsize;
unsigned int curpage, pgbase;
- unsigned int rsize = NFS_SERVER(inode)->rsize;
curpage = 0;
- pgbase = user_addr & ~PAGE_MASK;
+ pgbase = dreq->user_addr & ~PAGE_MASK;
do {
struct nfs_read_data *data;
- unsigned int bytes;
+ size_t bytes;
bytes = rsize;
if (count < rsize)
bytes = count;
+ BUG_ON(list_empty(list));
data = list_entry(list->next, struct nfs_read_data, pages);
list_del_init(&data->pages);
@@ -269,7 +350,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
data->cred = ctx->cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
- data->args.offset = file_offset;
+ data->args.offset = pos;
data->args.pgbase = pgbase;
data->args.pages = &pages[curpage];
data->args.count = bytes;
@@ -277,77 +358,38 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
data->res.eof = 0;
data->res.count = bytes;
+ rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+ &nfs_read_direct_ops, data);
NFS_PROTO(inode)->read_setup(data);
data->task.tk_cookie = (unsigned long) inode;
- data->complete = nfs_direct_read_result;
lock_kernel();
rpc_execute(&data->task);
unlock_kernel();
- dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+ dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
bytes,
(unsigned long long)data->args.offset);
- file_offset += bytes;
+ pos += bytes;
pgbase += bytes;
curpage += pgbase >> PAGE_SHIFT;
pgbase &= ~PAGE_MASK;
count -= bytes;
} while (count != 0);
+ BUG_ON(!list_empty(list));
}
-/**
- * nfs_direct_read_wait - wait for I/O completion for direct reads
- * @dreq: request on which we are to wait
- * @intr: whether or not this wait can be interrupted
- *
- * Collects and returns the final error value/byte-count.
- */
-static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
-{
- int result = 0;
-
- if (intr) {
- result = wait_event_interruptible(dreq->wait,
- (atomic_read(&dreq->complete) == 0));
- } else {
- wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
- }
-
- if (!result)
- result = atomic_read(&dreq->error);
- if (!result)
- result = atomic_read(&dreq->count);
-
- kref_put(&dreq->kref, nfs_direct_req_release);
- return (ssize_t) result;
-}
-
-/**
- * nfs_direct_read_seg - Read in one iov segment. Generate separate
- * read RPCs for each "rsize" bytes.
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * @nr_pages: number of pages in the array
- *
- */
-static ssize_t nfs_direct_read_seg(struct inode *inode,
- struct nfs_open_context *ctx, unsigned long user_addr,
- size_t count, loff_t file_offset, struct page **pages,
- unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
{
ssize_t result;
sigset_t oldset;
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_direct_req *dreq;
@@ -355,284 +397,350 @@ static ssize_t nfs_direct_read_seg(struct inode *inode,
if (!dreq)
return -ENOMEM;
+ dreq->user_addr = user_addr;
+ dreq->user_count = count;
+ dreq->pos = pos;
dreq->pages = pages;
dreq->npages = nr_pages;
+ dreq->inode = inode;
+ dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+ if (!is_sync_kiocb(iocb))
+ dreq->iocb = iocb;
+ nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
rpc_clnt_sigmask(clnt, &oldset);
- nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
- file_offset);
- result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+ nfs_direct_read_schedule(dreq);
+ result = nfs_direct_wait(dreq);
rpc_clnt_sigunmask(clnt, &oldset);
return result;
}
-/**
- * nfs_direct_read - For each iov segment, map the user's buffer
- * then generate read RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * We've already pushed out any non-direct writes so that this read
- * will see them when we read from the server.
- */
-static ssize_t
-nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
- const struct iovec *iov, loff_t file_offset,
- unsigned long nr_segs)
+static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
{
- ssize_t tot_bytes = 0;
- unsigned long seg = 0;
-
- while ((seg < nr_segs) && (tot_bytes >= 0)) {
- ssize_t result;
- int page_count;
- struct page **pages;
- const struct iovec *vec = &iov[seg++];
- unsigned long user_addr = (unsigned long) vec->iov_base;
- size_t size = vec->iov_len;
-
- page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
- if (page_count < 0) {
- nfs_free_user_pages(pages, 0, 0);
- if (tot_bytes > 0)
- break;
- return page_count;
- }
+ list_splice_init(&dreq->rewrite_list, &dreq->list);
+ while (!list_empty(&dreq->list)) {
+ struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
+ list_del(&data->pages);
+ nfs_writedata_release(data);
+ }
+}
- result = nfs_direct_read_seg(inode, ctx, user_addr, size,
- file_offset, pages, page_count);
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+{
+ struct list_head *pos;
- if (result <= 0) {
- if (tot_bytes > 0)
- break;
- return result;
- }
- tot_bytes += result;
- file_offset += result;
- if (result < size)
- break;
+ list_splice_init(&dreq->rewrite_list, &dreq->list);
+ list_for_each(pos, &dreq->list)
+ dreq->outstanding++;
+ dreq->count = 0;
+
+ nfs_direct_write_schedule(dreq, FLUSH_STABLE);
+}
+
+static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
+{
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+ /* Call the NFS version-specific code */
+ if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+ return;
+ if (unlikely(task->tk_status < 0)) {
+ dreq->error = task->tk_status;
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ }
+ if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+ dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
}
- return tot_bytes;
+ dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
+ nfs_direct_write_complete(dreq, data->inode);
}
-/**
- * nfs_direct_write_seg - Write out one iov segment. Generate separate
- * write RPCs for each "wsize" bytes, then commit.
- * @inode: target inode
- * @ctx: target file open context
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * nr_pages: size of pages array
- */
-static ssize_t nfs_direct_write_seg(struct inode *inode,
- struct nfs_open_context *ctx, unsigned long user_addr,
- size_t count, loff_t file_offset, struct page **pages,
- int nr_pages)
+static const struct rpc_call_ops nfs_commit_direct_ops = {
+ .rpc_call_done = nfs_direct_commit_result,
+ .rpc_release = nfs_commit_release,
+};
+
+static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
{
- const unsigned int wsize = NFS_SERVER(inode)->wsize;
- size_t request;
- int curpage, need_commit;
- ssize_t result, tot_bytes;
- struct nfs_writeverf first_verf;
- struct nfs_write_data *wdata;
-
- wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
- if (!wdata)
- return -ENOMEM;
+ struct nfs_write_data *data = dreq->commit_data;
+ struct rpc_task *task = &data->task;
- wdata->inode = inode;
- wdata->cred = ctx->cred;
- wdata->args.fh = NFS_FH(inode);
- wdata->args.context = ctx;
- wdata->args.stable = NFS_UNSTABLE;
- if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
- wdata->args.stable = NFS_FILE_SYNC;
- wdata->res.fattr = &wdata->fattr;
- wdata->res.verf = &wdata->verf;
+ data->inode = dreq->inode;
+ data->cred = dreq->ctx->cred;
- nfs_begin_data_update(inode);
-retry:
- need_commit = 0;
- tot_bytes = 0;
- curpage = 0;
- request = count;
- wdata->args.pgbase = user_addr & ~PAGE_MASK;
- wdata->args.offset = file_offset;
- do {
- wdata->args.count = request;
- if (wdata->args.count > wsize)
- wdata->args.count = wsize;
- wdata->args.pages = &pages[curpage];
+ data->args.fh = NFS_FH(data->inode);
+ data->args.offset = dreq->pos;
+ data->args.count = dreq->user_count;
+ data->res.count = 0;
+ data->res.fattr = &data->fattr;
+ data->res.verf = &data->verf;
- dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
- wdata->args.count, (long long) wdata->args.offset,
- user_addr + tot_bytes, wdata->args.pgbase, curpage);
+ rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
+ &nfs_commit_direct_ops, data);
+ NFS_PROTO(data->inode)->commit_setup(data, 0);
- lock_kernel();
- result = NFS_PROTO(inode)->write(wdata);
- unlock_kernel();
+ data->task.tk_priority = RPC_PRIORITY_NORMAL;
+ data->task.tk_cookie = (unsigned long)data->inode;
+ /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+ dreq->commit_data = NULL;
- if (result <= 0) {
- if (tot_bytes > 0)
- break;
- goto out;
- }
+ dprintk("NFS: %5u initiated commit call\n", task->tk_pid);
- if (tot_bytes == 0)
- memcpy(&first_verf.verifier, &wdata->verf.verifier,
- sizeof(first_verf.verifier));
- if (wdata->verf.committed != NFS_FILE_SYNC) {
- need_commit = 1;
- if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
- sizeof(first_verf.verifier)))
- goto sync_retry;
- }
+ lock_kernel();
+ rpc_execute(&data->task);
+ unlock_kernel();
+}
- tot_bytes += result;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+ int flags = dreq->flags;
- /* in case of a short write: stop now, let the app recover */
- if (result < wdata->args.count)
+ dreq->flags = 0;
+ switch (flags) {
+ case NFS_ODIRECT_DO_COMMIT:
+ nfs_direct_commit_schedule(dreq);
break;
+ case NFS_ODIRECT_RESCHED_WRITES:
+ nfs_direct_write_reschedule(dreq);
+ break;
+ default:
+ nfs_end_data_update(inode);
+ if (dreq->commit_data != NULL)
+ nfs_commit_free(dreq->commit_data);
+ nfs_direct_free_writedata(dreq);
+ nfs_direct_complete(dreq);
+ }
+}
- wdata->args.offset += result;
- wdata->args.pgbase += result;
- curpage += wdata->args.pgbase >> PAGE_SHIFT;
- wdata->args.pgbase &= ~PAGE_MASK;
- request -= result;
- } while (request != 0);
+static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+ dreq->commit_data = nfs_commit_alloc(0);
+ if (dreq->commit_data != NULL)
+ dreq->commit_data->req = (struct nfs_page *) dreq;
+}
+#else
+static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+ dreq->commit_data = NULL;
+}
- /*
- * Commit data written so far, even in the event of an error
- */
- if (need_commit) {
- wdata->args.count = tot_bytes;
- wdata->args.offset = file_offset;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+ nfs_end_data_update(inode);
+ nfs_direct_free_writedata(dreq);
+ nfs_direct_complete(dreq);
+}
+#endif
- lock_kernel();
- result = NFS_PROTO(inode)->commit(wdata);
- unlock_kernel();
+static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
+{
+ struct list_head *list;
+ struct nfs_direct_req *dreq;
+ unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ dreq = nfs_direct_req_alloc();
+ if (!dreq)
+ return NULL;
+
+ list = &dreq->list;
+ for(;;) {
+ struct nfs_write_data *data = nfs_writedata_alloc(wpages);
- if (result < 0 || memcmp(&first_verf.verifier,
- &wdata->verf.verifier,
- sizeof(first_verf.verifier)) != 0)
- goto sync_retry;
+ if (unlikely(!data)) {
+ while (!list_empty(list)) {
+ data = list_entry(list->next,
+ struct nfs_write_data, pages);
+ list_del(&data->pages);
+ nfs_writedata_free(data);
+ }
+ kref_put(&dreq->kref, nfs_direct_req_release);
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&data->pages);
+ list_add(&data->pages, list);
+
+ data->req = (struct nfs_page *) dreq;
+ dreq->outstanding++;
+ if (nbytes <= wsize)
+ break;
+ nbytes -= wsize;
}
- result = tot_bytes;
-out:
- nfs_end_data_update(inode);
- nfs_writedata_free(wdata);
- return result;
+ nfs_alloc_commit_data(dreq);
-sync_retry:
- wdata->args.stable = NFS_FILE_SYNC;
- goto retry;
+ kref_get(&dreq->kref);
+ return dreq;
}
-/**
- * nfs_direct_write - For each iov segment, map the user's buffer
- * then generate write and commit RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * Upon return, generic_file_direct_IO invalidates any cached pages
- * that non-direct readers might access, so they will pick up these
- * writes immediately.
- */
-static ssize_t nfs_direct_write(struct inode *inode,
- struct nfs_open_context *ctx, const struct iovec *iov,
- loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
{
- ssize_t tot_bytes = 0;
- unsigned long seg = 0;
-
- while ((seg < nr_segs) && (tot_bytes >= 0)) {
- ssize_t result;
- int page_count;
- struct page **pages;
- const struct iovec *vec = &iov[seg++];
- unsigned long user_addr = (unsigned long) vec->iov_base;
- size_t size = vec->iov_len;
-
- page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
- if (page_count < 0) {
- nfs_free_user_pages(pages, 0, 0);
- if (tot_bytes > 0)
- break;
- return page_count;
- }
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+ int status = task->tk_status;
+
+ if (nfs_writeback_done(task, data) != 0)
+ return;
+
+ spin_lock(&dreq->lock);
- result = nfs_direct_write_seg(inode, ctx, user_addr, size,
- file_offset, pages, page_count);
- nfs_free_user_pages(pages, page_count, 0);
+ if (likely(status >= 0))
+ dreq->count += data->res.count;
+ else
+ dreq->error = task->tk_status;
- if (result <= 0) {
- if (tot_bytes > 0)
+ if (data->res.verf->committed != NFS_FILE_SYNC) {
+ switch (dreq->flags) {
+ case 0:
+ memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
+ dreq->flags = NFS_ODIRECT_DO_COMMIT;
break;
- return result;
+ case NFS_ODIRECT_DO_COMMIT:
+ if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
+ dprintk("NFS: %5u write verify failed\n", task->tk_pid);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ }
}
- tot_bytes += result;
- file_offset += result;
- if (result < size)
- break;
}
- return tot_bytes;
+ /* In case we have to resend */
+ data->args.stable = NFS_FILE_SYNC;
+
+ spin_unlock(&dreq->lock);
}
-/**
- * nfs_direct_IO - NFS address space operation for direct I/O
- * rw: direction (read or write)
- * @iocb: target I/O control block
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
+/*
+ * NB: Return the value of the first error return code. Subsequent
+ * errors after the first one are ignored.
*/
-ssize_t
-nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_release(void *calldata)
{
- ssize_t result = -EINVAL;
- struct file *file = iocb->ki_filp;
- struct nfs_open_context *ctx;
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- /*
- * No support for async yet
- */
- if (!is_sync_kiocb(iocb))
- return result;
-
- ctx = (struct nfs_open_context *)file->private_data;
- switch (rw) {
- case READ:
- dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
- dentry->d_name.name, file_offset, nr_segs);
-
- result = nfs_direct_read(inode, ctx, iov,
- file_offset, nr_segs);
- break;
- case WRITE:
- dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
- dentry->d_name.name, file_offset, nr_segs);
-
- result = nfs_direct_write(inode, ctx, iov,
- file_offset, nr_segs);
- break;
- default:
- break;
+ spin_lock(&dreq->lock);
+ if (--dreq->outstanding) {
+ spin_unlock(&dreq->lock);
+ return;
}
+ spin_unlock(&dreq->lock);
+
+ nfs_direct_write_complete(dreq, data->inode);
+}
+
+static const struct rpc_call_ops nfs_write_direct_ops = {
+ .rpc_call_done = nfs_direct_write_result,
+ .rpc_release = nfs_direct_write_release,
+};
+
+/*
+ * For each nfs_write_data struct that was allocated on the list, dispatch
+ * an NFS WRITE operation
+ */
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
+{
+ struct nfs_open_context *ctx = dreq->ctx;
+ struct inode *inode = ctx->dentry->d_inode;
+ struct list_head *list = &dreq->list;
+ struct page **pages = dreq->pages;
+ size_t count = dreq->user_count;
+ loff_t pos = dreq->pos;
+ size_t wsize = NFS_SERVER(inode)->wsize;
+ unsigned int curpage, pgbase;
+
+ curpage = 0;
+ pgbase = dreq->user_addr & ~PAGE_MASK;
+ do {
+ struct nfs_write_data *data;
+ size_t bytes;
+
+ bytes = wsize;
+ if (count < wsize)
+ bytes = count;
+
+ BUG_ON(list_empty(list));
+ data = list_entry(list->next, struct nfs_write_data, pages);
+ list_move_tail(&data->pages, &dreq->rewrite_list);
+
+ data->inode = inode;
+ data->cred = ctx->cred;
+ data->args.fh = NFS_FH(inode);
+ data->args.context = ctx;
+ data->args.offset = pos;
+ data->args.pgbase = pgbase;
+ data->args.pages = &pages[curpage];
+ data->args.count = bytes;
+ data->res.fattr = &data->fattr;
+ data->res.count = bytes;
+ data->res.verf = &data->verf;
+
+ rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+ &nfs_write_direct_ops, data);
+ NFS_PROTO(inode)->write_setup(data, sync);
+
+ data->task.tk_priority = RPC_PRIORITY_NORMAL;
+ data->task.tk_cookie = (unsigned long) inode;
+
+ lock_kernel();
+ rpc_execute(&data->task);
+ unlock_kernel();
+
+ dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
+ data->task.tk_pid,
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ bytes,
+ (unsigned long long)data->args.offset);
+
+ pos += bytes;
+ pgbase += bytes;
+ curpage += pgbase >> PAGE_SHIFT;
+ pgbase &= ~PAGE_MASK;
+
+ count -= bytes;
+ } while (count != 0);
+ BUG_ON(!list_empty(list));
+}
+
+static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
+{
+ ssize_t result;
+ sigset_t oldset;
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ struct nfs_direct_req *dreq;
+ size_t wsize = NFS_SERVER(inode)->wsize;
+ int sync = 0;
+
+ dreq = nfs_direct_write_alloc(count, wsize);
+ if (!dreq)
+ return -ENOMEM;
+ if (dreq->commit_data == NULL || count < wsize)
+ sync = FLUSH_STABLE;
+
+ dreq->user_addr = user_addr;
+ dreq->user_count = count;
+ dreq->pos = pos;
+ dreq->pages = pages;
+ dreq->npages = nr_pages;
+ dreq->inode = inode;
+ dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+ if (!is_sync_kiocb(iocb))
+ dreq->iocb = iocb;
+
+ nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
+
+ nfs_begin_data_update(inode);
+
+ rpc_clnt_sigmask(clnt, &oldset);
+ nfs_direct_write_schedule(dreq, sync);
+ result = nfs_direct_wait(dreq);
+ rpc_clnt_sigunmask(clnt, &oldset);
+
return result;
}
@@ -640,49 +748,40 @@ nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
* @buf: user's buffer into which to read data
- * count: number of bytes to read
- * pos: byte offset in file where reading starts
+ * @count: number of bytes to read
+ * @pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
* generic_file_aio_read() in order to avoid gfar's check to see if
* the request starts before the end of the file. For that check
* to work, we must generate a GETATTR before each direct read, and
* even then there is a window between the GETATTR and the subsequent
- * READ where the file size could change. So our preference is simply
+ * READ where the file size could change. Our preference is simply
* to do all reads the application wants, and the server will take
* care of managing the end of file boundary.
- *
+ *
* This function also eliminates unnecessarily updating the file's
* atime locally, as the NFS server sets the file's atime, and this
* client must read the updated atime from the server back into its
* cache.
*/
-ssize_t
-nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
{
ssize_t retval = -EINVAL;
- loff_t *ppos = &iocb->ki_pos;
+ int page_count;
+ struct page **pages;
struct file *file = iocb->ki_filp;
- struct nfs_open_context *ctx =
- (struct nfs_open_context *) file->private_data;
struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- struct iovec iov = {
- .iov_base = buf,
- .iov_len = count,
- };
dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);
- if (!is_sync_kiocb(iocb))
- goto out;
if (count < 0)
goto out;
retval = -EFAULT;
- if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+ if (!access_ok(VERIFY_WRITE, buf, count))
goto out;
retval = 0;
if (!count)
@@ -692,9 +791,16 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
if (retval)
goto out;
- retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+ retval = nfs_get_user_pages(READ, (unsigned long) buf,
+ count, &pages);
+ if (retval < 0)
+ goto out;
+ page_count = retval;
+
+ retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
+ pages, page_count);
if (retval > 0)
- *ppos = pos + retval;
+ iocb->ki_pos = pos + retval;
out:
return retval;
@@ -704,8 +810,8 @@ out:
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
* @buf: user's buffer from which to write data
- * count: number of bytes to write
- * pos: byte offset in file where writing starts
+ * @count: number of bytes to write
+ * @pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
* generic_file_aio_write() in order to avoid taking the inode
@@ -725,28 +831,19 @@ out:
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t
-nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
{
ssize_t retval;
+ int page_count;
+ struct page **pages;
struct file *file = iocb->ki_filp;
- struct nfs_open_context *ctx =
- (struct nfs_open_context *) file->private_data;
struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- struct iovec iov = {
- .iov_base = (char __user *)buf,
- };
dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);
- retval = -EINVAL;
- if (!is_sync_kiocb(iocb))
- goto out;
-
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
goto out;
@@ -757,19 +854,35 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
retval = 0;
if (!count)
goto out;
- iov.iov_len = count,
retval = -EFAULT;
- if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+ if (!access_ok(VERIFY_READ, buf, count))
goto out;
retval = nfs_sync_mapping(mapping);
if (retval)
goto out;
- retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+ retval = nfs_get_user_pages(WRITE, (unsigned long) buf,
+ count, &pages);
+ if (retval < 0)
+ goto out;
+ page_count = retval;
+
+ retval = nfs_direct_write(iocb, (unsigned long) buf, count,
+ pos, pages, page_count);
+
+ /*
+ * XXX: nfs_end_data_update() already ensures this file's
+ * cached data is subsequently invalidated. Do we really
+ * need to call invalidate_inode_pages2() again here?
+ *
+ * For aio writes, this invalidation will almost certainly
+ * occur before the writes complete. Kind of racey.
+ */
if (mapping->nrpages)
invalidate_inode_pages2(mapping);
+
if (retval > 0)
iocb->ki_pos = pos + retval;
@@ -777,6 +890,10 @@ out:
return retval;
}
+/**
+ * nfs_init_directcache - create a slab cache for nfs_direct_req structures
+ *
+ */
int nfs_init_directcache(void)
{
nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
@@ -790,6 +907,10 @@ int nfs_init_directcache(void)
return 0;
}
+/**
+ * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures
+ *
+ */
void nfs_destroy_directcache(void)
{
if (kmem_cache_destroy(nfs_direct_cachep))
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7a79fbe9f53..5263b2864a4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -32,6 +32,7 @@
#include <asm/system.h>
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_FILE
@@ -102,18 +103,15 @@ static int nfs_check_flags(int flags)
static int
nfs_file_open(struct inode *inode, struct file *filp)
{
- struct nfs_server *server = NFS_SERVER(inode);
- int (*open)(struct inode *, struct file *);
int res;
res = nfs_check_flags(filp->f_flags);
if (res)
return res;
+ nfs_inc_stats(inode, NFSIOS_VFSOPEN);
lock_kernel();
- /* Do NFSv4 open() call */
- if ((open = server->rpc_ops->file_open) != NULL)
- res = open(inode, filp);
+ res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
unlock_kernel();
return res;
}
@@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
/* Ensure that dirty pages are flushed out with the right creds */
if (filp->f_mode & FMODE_WRITE)
filemap_fdatawrite(filp->f_mapping);
+ nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
return NFS_PROTO(inode)->file_release(inode, filp);
}
@@ -199,6 +198,7 @@ nfs_file_flush(struct file *file)
if ((file->f_mode & FMODE_WRITE) == 0)
return 0;
+ nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
lock_kernel();
/* Ensure that data+attribute caches are up to date after close() */
status = nfs_wb_all(inode);
@@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
(unsigned long) count, (unsigned long) pos);
result = nfs_revalidate_file(inode, iocb->ki_filp);
+ nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
if (!result)
result = generic_file_aio_read(iocb, buf, count, pos);
return result;
@@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+ nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
lock_kernel();
status = nfs_wb_all(inode);
if (!status) {
@@ -316,6 +318,17 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
return status;
}
+static int nfs_invalidate_page(struct page *page, unsigned long offset)
+{
+ /* FIXME: we really should cancel any unstarted writes on this page */
+ return 1;
+}
+
+static int nfs_release_page(struct page *page, gfp_t gfp)
+{
+ return !nfs_wb_page(page->mapping->host, page);
+}
+
struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
.readpages = nfs_readpages,
@@ -324,6 +337,8 @@ struct address_space_operations nfs_file_aops = {
.writepages = nfs_writepages,
.prepare_write = nfs_prepare_write,
.commit_write = nfs_commit_write,
+ .invalidatepage = nfs_invalidate_page,
+ .releasepage = nfs_release_page,
#ifdef CONFIG_NFS_DIRECTIO
.direct_IO = nfs_direct_IO,
#endif
@@ -365,6 +380,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
if (!count)
goto out;
+ nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
result = generic_file_aio_write(iocb, buf, count, pos);
out:
return result;
@@ -376,15 +392,17 @@ out_swapfile:
static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
{
- struct file_lock *cfl;
+ struct file_lock cfl;
struct inode *inode = filp->f_mapping->host;
int status = 0;
lock_kernel();
/* Try local locking first */
- cfl = posix_test_lock(filp, fl);
- if (cfl != NULL) {
- locks_copy_lock(fl, cfl);
+ if (posix_test_lock(filp, fl, &cfl)) {
+ fl->fl_start = cfl.fl_start;
+ fl->fl_end = cfl.fl_end;
+ fl->fl_type = cfl.fl_type;
+ fl->fl_pid = cfl.fl_pid;
goto out;
}
@@ -425,10 +443,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
- sigset_t oldset;
int status;
- rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
/*
* Flush all pending writes before doing anything
* with locks..
@@ -446,17 +462,14 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
else
status = do_vfs_lock(filp, fl);
unlock_kernel();
- rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
return status;
}
static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
- sigset_t oldset;
int status;
- rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
/*
* Flush all pending writes before doing anything
* with locks..
@@ -489,7 +502,6 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
nfs_sync_mapping(filp->f_mapping);
nfs_zap_caches(inode);
out:
- rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
return status;
}
@@ -504,9 +516,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
inode->i_sb->s_id, inode->i_ino,
fl->fl_type, fl->fl_flags,
(long long)fl->fl_start, (long long)fl->fl_end);
-
- if (!inode)
- return -EINVAL;
+ nfs_inc_stats(inode, NFSIOS_VFSLOCK);
/* No mandatory locks over NFS */
if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
@@ -531,9 +541,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
inode->i_sb->s_id, inode->i_ino,
fl->fl_type, fl->fl_flags);
- if (!inode)
- return -EINVAL;
-
/*
* No BSD flocks over NFS allowed.
* Note: we could try to fake a POSIX lock request here by
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 821edd30333..3fab5b0cfc5 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -35,6 +35,7 @@
*/
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -74,8 +75,8 @@ struct idmap {
struct dentry *idmap_dentry;
wait_queue_head_t idmap_wq;
struct idmap_msg idmap_im;
- struct semaphore idmap_lock; /* Serializes upcalls */
- struct semaphore idmap_im_lock; /* Protects the hashtable */
+ struct mutex idmap_lock; /* Serializes upcalls */
+ struct mutex idmap_im_lock; /* Protects the hashtable */
struct idmap_hashtable idmap_user_hash;
struct idmap_hashtable idmap_group_hash;
};
@@ -101,11 +102,9 @@ nfs_idmap_new(struct nfs4_client *clp)
if (clp->cl_idmap != NULL)
return;
- if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+ if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
return;
- memset(idmap, 0, sizeof(*idmap));
-
snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
"%s/idmap", clp->cl_rpcclient->cl_pathname);
@@ -116,8 +115,8 @@ nfs_idmap_new(struct nfs4_client *clp)
return;
}
- init_MUTEX(&idmap->idmap_lock);
- init_MUTEX(&idmap->idmap_im_lock);
+ mutex_init(&idmap->idmap_lock);
+ mutex_init(&idmap->idmap_im_lock);
init_waitqueue_head(&idmap->idmap_wq);
idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
@@ -132,6 +131,8 @@ nfs_idmap_delete(struct nfs4_client *clp)
if (!idmap)
return;
+ dput(idmap->idmap_dentry);
+ idmap->idmap_dentry = NULL;
rpc_unlink(idmap->idmap_path);
clp->cl_idmap = NULL;
kfree(idmap);
@@ -232,8 +233,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
if (namelen >= IDMAP_NAMESZ)
return -EINVAL;
- down(&idmap->idmap_lock);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_lock);
+ mutex_lock(&idmap->idmap_im_lock);
he = idmap_lookup_name(h, name, namelen);
if (he != NULL) {
@@ -259,11 +260,11 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
}
set_current_state(TASK_UNINTERRUPTIBLE);
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
schedule();
current->state = TASK_RUNNING;
remove_wait_queue(&idmap->idmap_wq, &wq);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
if (im->im_status & IDMAP_STATUS_SUCCESS) {
*id = im->im_id;
@@ -272,8 +273,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
out:
memset(im, 0, sizeof(*im));
- up(&idmap->idmap_im_lock);
- up(&idmap->idmap_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_lock);
return (ret);
}
@@ -293,8 +294,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
im = &idmap->idmap_im;
- down(&idmap->idmap_lock);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_lock);
+ mutex_lock(&idmap->idmap_im_lock);
he = idmap_lookup_id(h, id);
if (he != 0) {
@@ -320,11 +321,11 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
}
set_current_state(TASK_UNINTERRUPTIBLE);
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
schedule();
current->state = TASK_RUNNING;
remove_wait_queue(&idmap->idmap_wq, &wq);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
if (im->im_status & IDMAP_STATUS_SUCCESS) {
if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
@@ -335,8 +336,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
out:
memset(im, 0, sizeof(*im));
- up(&idmap->idmap_im_lock);
- up(&idmap->idmap_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_lock);
return ret;
}
@@ -380,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (copy_from_user(&im_in, src, mlen) != 0)
return (-EFAULT);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
ret = mlen;
im->im_status = im_in.im_status;
@@ -440,7 +441,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
ret = mlen;
out:
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
return ret;
}
@@ -452,10 +453,10 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
if (msg->errno >= 0)
return;
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
im->im_status = IDMAP_STATUS_LOOKUPFAIL;
wake_up(&idmap->idmap_wq);
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
}
/*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3413996f9a8..2f7656b911b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -26,6 +26,7 @@
#include <linux/unistd.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs4_mount.h>
@@ -42,6 +43,7 @@
#include "nfs4_fs.h"
#include "callback.h"
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_VFS
#define NFS_PARANOIA 1
@@ -65,6 +67,7 @@ static void nfs_clear_inode(struct inode *);
static void nfs_umount_begin(struct super_block *);
static int nfs_statfs(struct super_block *, struct kstatfs *);
static int nfs_show_options(struct seq_file *, struct vfsmount *);
+static int nfs_show_stats(struct seq_file *, struct vfsmount *);
static void nfs_zap_acl_cache(struct inode *);
static struct rpc_program nfs_program;
@@ -78,6 +81,7 @@ static struct super_operations nfs_sops = {
.clear_inode = nfs_clear_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
+ .show_stats = nfs_show_stats,
};
/*
@@ -133,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
static int
nfs_write_inode(struct inode *inode, int sync)
{
- int flags = sync ? FLUSH_WAIT : 0;
+ int flags = sync ? FLUSH_SYNC : 0;
int ret;
ret = nfs_commit_inode(inode, flags);
@@ -237,7 +241,6 @@ static struct inode *
nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
{
struct nfs_server *server = NFS_SB(sb);
- struct inode *rooti;
int error;
error = server->rpc_ops->getroot(server, rootfh, fsinfo);
@@ -246,10 +249,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f
return ERR_PTR(error);
}
- rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
- if (!rooti)
- return ERR_PTR(-ENOMEM);
- return rooti;
+ return nfs_fhget(sb, rootfh, fsinfo->fattr);
}
/*
@@ -277,6 +277,10 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
sb->s_magic = NFS_SUPER_MAGIC;
+ server->io_stats = nfs_alloc_iostats();
+ if (server->io_stats == NULL)
+ return -ENOMEM;
+
root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
/* Did getting the root inode fail? */
if (IS_ERR(root_inode)) {
@@ -290,6 +294,9 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
}
sb->s_root->d_op = server->rpc_ops->dentry_ops;
+ /* mount time stamp, in seconds */
+ server->mount_time = jiffies;
+
/* Get some general file system info */
if (server->namelen == 0 &&
server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
@@ -396,6 +403,9 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
+ server->retrans_timeo = timeparms.to_initval;
+ server->retrans_count = timeparms.to_retries;
+
/* create transport and client */
xprt = xprt_create_proto(proto, &server->addr, &timeparms);
if (IS_ERR(xprt)) {
@@ -579,7 +589,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
}
-static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
{
static struct proc_nfs_info {
int flag;
@@ -588,28 +598,26 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
} nfs_info[] = {
{ NFS_MOUNT_SOFT, ",soft", ",hard" },
{ NFS_MOUNT_INTR, ",intr", "" },
- { NFS_MOUNT_POSIX, ",posix", "" },
{ NFS_MOUNT_NOCTO, ",nocto", "" },
{ NFS_MOUNT_NOAC, ",noac", "" },
- { NFS_MOUNT_NONLM, ",nolock", ",lock" },
+ { NFS_MOUNT_NONLM, ",nolock", "" },
{ NFS_MOUNT_NOACL, ",noacl", "" },
{ 0, NULL, NULL }
};
struct proc_nfs_info *nfs_infop;
- struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
char buf[12];
char *proto;
- seq_printf(m, ",v%d", nfss->rpc_ops->version);
+ seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
seq_printf(m, ",rsize=%d", nfss->rsize);
seq_printf(m, ",wsize=%d", nfss->wsize);
- if (nfss->acregmin != 3*HZ)
+ if (nfss->acregmin != 3*HZ || showdefaults)
seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
- if (nfss->acregmax != 60*HZ)
+ if (nfss->acregmax != 60*HZ || showdefaults)
seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
- if (nfss->acdirmin != 30*HZ)
+ if (nfss->acdirmin != 30*HZ || showdefaults)
seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
- if (nfss->acdirmax != 60*HZ)
+ if (nfss->acdirmax != 60*HZ || showdefaults)
seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
if (nfss->flags & nfs_infop->flag)
@@ -629,8 +637,96 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
proto = buf;
}
seq_printf(m, ",proto=%s", proto);
+ seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
+ seq_printf(m, ",retrans=%u", nfss->retrans_count);
+}
+
+static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+
+ nfs_show_mount_options(m, nfss, 0);
+
seq_puts(m, ",addr=");
seq_escape(m, nfss->hostname, " \t\n\\");
+
+ return 0;
+}
+
+static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+{
+ int i, cpu;
+ struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+ struct rpc_auth *auth = nfss->client->cl_auth;
+ struct nfs_iostats totals = { };
+
+ seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS);
+
+ /*
+ * Display all mount option settings
+ */
+ seq_printf(m, "\n\topts:\t");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
+ nfs_show_mount_options(m, nfss, 1);
+
+ seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
+
+ seq_printf(m, "\n\tcaps:\t");
+ seq_printf(m, "caps=0x%x", nfss->caps);
+ seq_printf(m, ",wtmult=%d", nfss->wtmult);
+ seq_printf(m, ",dtsize=%d", nfss->dtsize);
+ seq_printf(m, ",bsize=%d", nfss->bsize);
+ seq_printf(m, ",namelen=%d", nfss->namelen);
+
+#ifdef CONFIG_NFS_V4
+ if (nfss->rpc_ops->version == 4) {
+ seq_printf(m, "\n\tnfsv4:\t");
+ seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+ seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+ seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
+ }
+#endif
+
+ /*
+ * Display security flavor in effect for this mount
+ */
+ seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
+ if (auth->au_flavor)
+ seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
+
+ /*
+ * Display superblock I/O counters
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ struct nfs_iostats *stats;
+
+ if (!cpu_possible(cpu))
+ continue;
+
+ preempt_disable();
+ stats = per_cpu_ptr(nfss->io_stats, cpu);
+
+ for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+ totals.events[i] += stats->events[i];
+ for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+ totals.bytes[i] += stats->bytes[i];
+
+ preempt_enable();
+ }
+
+ seq_printf(m, "\n\tevents:\t");
+ for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+ seq_printf(m, "%lu ", totals.events[i]);
+ seq_printf(m, "\n\tbytes:\t");
+ for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+ seq_printf(m, "%Lu ", totals.bytes[i]);
+ seq_printf(m, "\n");
+
+ rpc_print_iostats(m, nfss->client);
+
return 0;
}
@@ -660,6 +756,8 @@ static void nfs_zap_caches_locked(struct inode *inode)
struct nfs_inode *nfsi = NFS_I(inode);
int mode = inode->i_mode;
+ nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
+
NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
@@ -751,7 +849,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
.fh = fh,
.fattr = fattr
};
- struct inode *inode = NULL;
+ struct inode *inode = ERR_PTR(-ENOENT);
unsigned long hash;
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
@@ -764,8 +862,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
hash = nfs_fattr_to_ino_t(fattr);
- if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc)))
+ inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc);
+ if (inode == NULL) {
+ inode = ERR_PTR(-ENOMEM);
goto out_no_inode;
+ }
if (inode->i_state & I_NEW) {
struct nfs_inode *nfsi = NFS_I(inode);
@@ -834,7 +935,7 @@ out:
return inode;
out_no_inode:
- printk("nfs_fhget: iget failed\n");
+ dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode));
goto out;
}
@@ -847,6 +948,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
struct nfs_fattr fattr;
int error;
+ nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+
if (attr->ia_valid & ATTR_SIZE) {
if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
attr->ia_valid &= ~ATTR_SIZE;
@@ -859,11 +962,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
lock_kernel();
nfs_begin_data_update(inode);
- /* Write all dirty data if we're changing file permissions or size */
- if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
- filemap_write_and_wait(inode->i_mapping);
- nfs_wb_all(inode);
- }
+ /* Write all dirty data */
+ filemap_write_and_wait(inode->i_mapping);
+ nfs_wb_all(inode);
/*
* Return any delegations if we're going to change ACLs
*/
@@ -902,6 +1003,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
spin_unlock(&inode->i_lock);
}
if ((attr->ia_valid & ATTR_SIZE) != 0) {
+ nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
inode->i_size = attr->ia_size;
vmtruncate(inode, attr->ia_size);
}
@@ -949,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
int err;
/* Flush out writes to the server in order to update c/mtime */
- nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
+ nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
/*
* We may force a getattr if the user cares about atime.
@@ -973,7 +1075,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
return err;
}
-struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred)
+static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
{
struct nfs_open_context *ctx;
@@ -981,6 +1083,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
if (ctx != NULL) {
atomic_set(&ctx->count, 1);
ctx->dentry = dget(dentry);
+ ctx->vfsmnt = mntget(mnt);
ctx->cred = get_rpccred(cred);
ctx->state = NULL;
ctx->lockowner = current->files;
@@ -1011,6 +1114,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
if (ctx->cred != NULL)
put_rpccred(ctx->cred);
dput(ctx->dentry);
+ mntput(ctx->vfsmnt);
kfree(ctx);
}
}
@@ -1019,7 +1123,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
* Ensure that mmap has a recent RPC credential for use when writing out
* shared pages
*/
-void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
{
struct inode *inode = filp->f_dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
@@ -1051,7 +1155,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
return ctx;
}
-void nfs_file_clear_open_context(struct file *filp)
+static void nfs_file_clear_open_context(struct file *filp)
{
struct inode *inode = filp->f_dentry->d_inode;
struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
@@ -1076,7 +1180,7 @@ int nfs_open(struct inode *inode, struct file *filp)
cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
if (IS_ERR(cred))
return PTR_ERR(cred);
- ctx = alloc_nfs_open_context(filp->f_dentry, cred);
+ ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred);
put_rpccred(cred);
if (ctx == NULL)
return -ENOMEM;
@@ -1185,6 +1289,7 @@ int nfs_attribute_timeout(struct inode *inode)
*/
int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
{
+ nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
&& !nfs_attribute_timeout(inode))
return NFS_STALE(inode) ? -ESTALE : 0;
@@ -1201,6 +1306,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
struct nfs_inode *nfsi = NFS_I(inode);
if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+ nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
if (S_ISREG(inode->i_mode))
nfs_sync_mapping(mapping);
invalidate_inode_pages2(mapping);
@@ -1299,39 +1405,37 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
return 0;
+ /* Has the inode gone and changed behind our back? */
+ if (nfsi->fileid != fattr->fileid
+ || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+ return -EIO;
+ }
+
/* Are we in the process of updating data on the server? */
data_unstable = nfs_caches_unstable(inode);
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
- if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
- nfsi->change_attr != fattr->change_attr) {
+ if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) {
+ if (nfsi->change_attr == fattr->change_attr)
+ goto out;
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (!data_unstable)
nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
}
- /* Has the inode gone and changed behind our back? */
- if (nfsi->fileid != fattr->fileid
- || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
- return -EIO;
- }
-
- cur_size = i_size_read(inode);
- new_isize = nfs_size_to_loff_t(fattr->size);
-
/* Verify a few of the more important attributes */
if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (!data_unstable)
nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
}
- if (cur_size != new_isize) {
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
- if (nfsi->npages == 0)
- nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
- }
+
+ cur_size = i_size_read(inode);
+ new_isize = nfs_size_to_loff_t(fattr->size);
+ if (cur_size != new_isize && nfsi->npages == 0)
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
/* Have any file permissions changed? */
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
@@ -1343,6 +1447,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if (inode->i_nlink != fattr->nlink)
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+out:
if (!timespec_equal(&inode->i_atime, &fattr->atime))
nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
@@ -1481,15 +1586,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->cache_change_attribute = jiffies;
}
- if ((fattr->valid & NFS_ATTR_FATTR_V4)
- && nfsi->change_attr != fattr->change_attr) {
- dprintk("NFS: change_attr change on server for file %s/%ld\n",
- inode->i_sb->s_id, inode->i_ino);
- nfsi->change_attr = fattr->change_attr;
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
- nfsi->cache_change_attribute = jiffies;
- }
-
/* If ctime has changed we should definitely clear access+acl caches */
if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
@@ -1519,8 +1615,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_blksize = fattr->du.nfs2.blocksize;
}
+ if ((fattr->valid & NFS_ATTR_FATTR_V4)) {
+ if (nfsi->change_attr != fattr->change_attr) {
+ dprintk("NFS: change_attr change on server for file %s/%ld\n",
+ inode->i_sb->s_id, inode->i_ino);
+ nfsi->change_attr = fattr->change_attr;
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ nfsi->cache_change_attribute = jiffies;
+ } else
+ invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA);
+ }
+
/* Update attrtimeo value if we're out of the unstable period */
if (invalid & NFS_INO_INVALID_ATTR) {
+ nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = jiffies;
} else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
@@ -1637,10 +1745,9 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
#endif /* CONFIG_NFS_V3 */
s = ERR_PTR(-ENOMEM);
- server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+ server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
if (!server)
goto out_err;
- memset(server, 0, sizeof(struct nfs_server));
/* Zero out the NFS state stuff */
init_nfsv4_state(server);
server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1712,6 +1819,7 @@ static void nfs_kill_super(struct super_block *s)
rpciod_down(); /* release rpciod */
+ nfs_free_iostats(server->io_stats);
kfree(server->hostname);
kfree(server);
}
@@ -1738,6 +1846,7 @@ static struct super_operations nfs4_sops = {
.clear_inode = nfs4_clear_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
+ .show_stats = nfs_show_stats,
};
/*
@@ -1800,6 +1909,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
+ server->retrans_timeo = timeparms.to_initval;
+ server->retrans_count = timeparms.to_retries;
+
clp = nfs4_get_client(&server->addr.sin_addr);
if (!clp) {
dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
@@ -1941,10 +2053,9 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
return ERR_PTR(-EINVAL);
}
- server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+ server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
if (!server)
return ERR_PTR(-ENOMEM);
- memset(server, 0, sizeof(struct nfs_server));
/* Zero out the NFS state stuff */
init_nfsv4_state(server);
server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -2024,10 +2135,12 @@ static void nfs4_kill_super(struct super_block *sb)
if (server->client != NULL && !IS_ERR(server->client))
rpc_shutdown_client(server->client);
- rpciod_down(); /* release rpciod */
destroy_nfsv4_state(server);
+ rpciod_down();
+
+ nfs_free_iostats(server->io_stats);
kfree(server->hostname);
kfree(server);
}
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
new file mode 100644
index 00000000000..6350ecbde58
--- /dev/null
+++ b/fs/nfs/iostat.h
@@ -0,0 +1,164 @@
+/*
+ * linux/fs/nfs/iostat.h
+ *
+ * Declarations for NFS client per-mount statistics
+ *
+ * Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
+ *
+ * NFS client per-mount statistics provide information about the health of
+ * the NFS client and the health of each NFS mount point. Generally these
+ * are not for detailed problem diagnosis, but simply to indicate that there
+ * is a problem.
+ *
+ * These counters are not meant to be human-readable, but are meant to be
+ * integrated into system monitoring tools such as "sar" and "iostat". As
+ * such, the counters are sampled by the tools over time, and are never
+ * zeroed after a file system is mounted. Moving averages can be computed
+ * by the tools by taking the difference between two instantaneous samples
+ * and dividing that by the time between the samples.
+ */
+
+#ifndef _NFS_IOSTAT
+#define _NFS_IOSTAT
+
+#define NFS_IOSTAT_VERS "1.0"
+
+/*
+ * NFS byte counters
+ *
+ * 1. SERVER - the number of payload bytes read from or written to the
+ * server by the NFS client via an NFS READ or WRITE request.
+ *
+ * 2. NORMAL - the number of bytes read or written by applications via
+ * the read(2) and write(2) system call interfaces.
+ *
+ * 3. DIRECT - the number of bytes read or written from files opened
+ * with the O_DIRECT flag.
+ *
+ * These counters give a view of the data throughput into and out of the NFS
+ * client. Comparing the number of bytes requested by an application with the
+ * number of bytes the client requests from the server can provide an
+ * indication of client efficiency (per-op, cache hits, etc).
+ *
+ * These counters can also help characterize which access methods are in
+ * use. DIRECT by itself shows whether there is any O_DIRECT traffic.
+ * NORMAL + DIRECT shows how much data is going through the system call
+ * interface. A large amount of SERVER traffic without much NORMAL or
+ * DIRECT traffic shows that applications are using mapped files.
+ *
+ * NFS page counters
+ *
+ * These count the number of pages read or written via nfs_readpage(),
+ * nfs_readpages(), or their write equivalents.
+ */
+enum nfs_stat_bytecounters {
+ NFSIOS_NORMALREADBYTES = 0,
+ NFSIOS_NORMALWRITTENBYTES,
+ NFSIOS_DIRECTREADBYTES,
+ NFSIOS_DIRECTWRITTENBYTES,
+ NFSIOS_SERVERREADBYTES,
+ NFSIOS_SERVERWRITTENBYTES,
+ NFSIOS_READPAGES,
+ NFSIOS_WRITEPAGES,
+ __NFSIOS_BYTESMAX,
+};
+
+/*
+ * NFS event counters
+ *
+ * These counters provide a low-overhead way of monitoring client activity
+ * without enabling NFS trace debugging. The counters show the rate at
+ * which VFS requests are made, and how often the client invalidates its
+ * data and attribute caches. This allows system administrators to monitor
+ * such things as how close-to-open is working, and answer questions such
+ * as "why are there so many GETATTR requests on the wire?"
+ *
+ * They also count anamolous events such as short reads and writes, silly
+ * renames due to close-after-delete, and operations that change the size
+ * of a file (such operations can often be the source of data corruption
+ * if applications aren't using file locking properly).
+ */
+enum nfs_stat_eventcounters {
+ NFSIOS_INODEREVALIDATE = 0,
+ NFSIOS_DENTRYREVALIDATE,
+ NFSIOS_DATAINVALIDATE,
+ NFSIOS_ATTRINVALIDATE,
+ NFSIOS_VFSOPEN,
+ NFSIOS_VFSLOOKUP,
+ NFSIOS_VFSACCESS,
+ NFSIOS_VFSUPDATEPAGE,
+ NFSIOS_VFSREADPAGE,
+ NFSIOS_VFSREADPAGES,
+ NFSIOS_VFSWRITEPAGE,
+ NFSIOS_VFSWRITEPAGES,
+ NFSIOS_VFSGETDENTS,
+ NFSIOS_VFSSETATTR,
+ NFSIOS_VFSFLUSH,
+ NFSIOS_VFSFSYNC,
+ NFSIOS_VFSLOCK,
+ NFSIOS_VFSRELEASE,
+ NFSIOS_CONGESTIONWAIT,
+ NFSIOS_SETATTRTRUNC,
+ NFSIOS_EXTENDWRITE,
+ NFSIOS_SILLYRENAME,
+ NFSIOS_SHORTREAD,
+ NFSIOS_SHORTWRITE,
+ NFSIOS_DELAY,
+ __NFSIOS_COUNTSMAX,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/percpu.h>
+#include <linux/cache.h>
+
+struct nfs_iostats {
+ unsigned long long bytes[__NFSIOS_BYTESMAX];
+ unsigned long events[__NFSIOS_COUNTSMAX];
+} ____cacheline_aligned;
+
+static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat)
+{
+ struct nfs_iostats *iostats;
+ int cpu;
+
+ cpu = get_cpu();
+ iostats = per_cpu_ptr(server->io_stats, cpu);
+ iostats->events[stat] ++;
+ put_cpu_no_resched();
+}
+
+static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+{
+ nfs_inc_server_stats(NFS_SERVER(inode), stat);
+}
+
+static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+ struct nfs_iostats *iostats;
+ int cpu;
+
+ cpu = get_cpu();
+ iostats = per_cpu_ptr(server->io_stats, cpu);
+ iostats->bytes[stat] += addend;
+ put_cpu_no_resched();
+}
+
+static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+ nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
+}
+
+static inline struct nfs_iostats *nfs_alloc_iostats(void)
+{
+ return alloc_percpu(struct nfs_iostats);
+}
+
+static inline void nfs_free_iostats(struct nfs_iostats *stats)
+{
+ if (stats != NULL)
+ free_percpu(stats);
+}
+
+#endif
+#endif
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 0b9a78353d6..445abb4d421 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
struct mnt_fhstatus result = {
.fh = fh
};
+ struct rpc_message msg = {
+ .rpc_argp = path,
+ .rpc_resp = &result,
+ };
char hostname[32];
int status;
- int call;
dprintk("NFS: nfs_mount(%08x:%s)\n",
(unsigned)ntohl(addr->sin_addr.s_addr), path);
@@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
if (IS_ERR(mnt_clnt))
return PTR_ERR(mnt_clnt);
- call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
- status = rpc_call(mnt_clnt, call, path, &result, 0);
+ if (version == NFS_MNT3_VERSION)
+ msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
+ else
+ msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+
+ status = rpc_call_sync(mnt_clnt, &msg, 0);
return status < 0? status : (result.status? -EACCES : 0);
}
@@ -137,6 +144,8 @@ static struct rpc_procinfo mnt_procedures[] = {
.p_encode = (kxdrproc_t) xdr_encode_dirpath,
.p_decode = (kxdrproc_t) xdr_decode_fhstatus,
.p_bufsiz = MNT_dirpath_sz << 2,
+ .p_statidx = MNTPROC_MNT,
+ .p_name = "MOUNT",
},
};
@@ -146,6 +155,8 @@ static struct rpc_procinfo mnt3_procedures[] = {
.p_encode = (kxdrproc_t) xdr_encode_dirpath,
.p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
.p_bufsiz = MNT_dirpath_sz << 2,
+ .p_statidx = MOUNTPROC3_MNT,
+ .p_name = "MOUNT",
},
};
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 6548a65de94..f0015fa876e 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat)
.p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs_xdr_##restype, \
.p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
- .p_timer = timer \
+ .p_timer = timer, \
+ .p_statidx = NFSPROC_##proc, \
+ .p_name = #proc, \
}
struct rpc_procinfo nfs_procedures[] = {
PROC(GETATTR, fhandle, attrstat, 1),
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 6a5bbc0ae94..33287879bd2 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
struct nfs3_getaclres res = {
.fattr = &fattr,
};
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
struct posix_acl *acl;
int status, count;
@@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
return NULL;
dprintk("NFS call getacl\n");
- status = rpc_call(server->client_acl, ACLPROC3_GETACL,
- &args, &res, 0);
+ msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
+ status = rpc_call_sync(server->client_acl, &msg, 0);
dprintk("NFS reply getacl: %d\n", status);
/* pages may have been allocated at the xdr layer. */
@@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
.acl_access = acl,
.pages = pages,
};
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+ .rpc_resp = &fattr,
+ };
int status, count;
status = -EOPNOTSUPP;
@@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
dprintk("NFS call setacl\n");
nfs_begin_data_update(inode);
- status = rpc_call(server->client_acl, ACLPROC3_SETACL,
- &args, &fattr, 0);
+ msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
+ status = rpc_call_sync(server->client_acl, &msg, 0);
spin_lock(&inode->i_lock);
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
spin_unlock(&inode->i_lock);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ed67567f055..cf186f0d2b3 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -19,6 +19,8 @@
#include <linux/smp_lock.h>
#include <linux/nfs_mount.h>
+#include "iostat.h"
+
#define NFSDBG_FACILITY NFSDBG_PROC
extern struct rpc_procinfo nfs3_procedures[];
@@ -41,27 +43,14 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
return res;
}
-static inline int
-nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
- struct rpc_message msg = {
- .rpc_proc = &clnt->cl_procinfo[proc],
- .rpc_argp = argp,
- .rpc_resp = resp,
- };
- return nfs3_rpc_wrapper(clnt, &msg, flags);
-}
-
-#define rpc_call(clnt, proc, argp, resp, flags) \
- nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
-#define rpc_call_sync(clnt, msg, flags) \
- nfs3_rpc_wrapper(clnt, msg, flags)
+#define rpc_call_sync(clnt, msg, flags) nfs3_rpc_wrapper(clnt, msg, flags)
static int
-nfs3_async_handle_jukebox(struct rpc_task *task)
+nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
{
if (task->tk_status != -EJUKEBOX)
return 0;
+ nfs_inc_stats(inode, NFSIOS_DELAY);
task->tk_status = 0;
rpc_restart_call(task);
rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
@@ -72,14 +61,21 @@ static int
do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO],
+ .rpc_argp = fhandle,
+ .rpc_resp = info,
+ };
int status;
dprintk("%s: call fsinfo\n", __FUNCTION__);
nfs_fattr_init(info->fattr);
- status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0);
+ status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
- status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+ msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+ msg.rpc_resp = info->fattr;
+ status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
}
return status;
@@ -107,12 +103,16 @@ static int
nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR],
+ .rpc_argp = fhandle,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(server->client, NFS3PROC_GETATTR,
- fhandle, fattr, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
@@ -126,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
.fh = NFS_FH(inode),
.sattr = sattr,
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_SETATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("NFS call setattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
dprintk("NFS reply setattr: %d\n", status);
@@ -152,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
.fh = fhandle,
.fattr = fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_LOOKUP],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call lookup %s\n", name->name);
nfs_fattr_init(&dir_attr);
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
- if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
- fhandle, fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
+ msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+ msg.rpc_argp = fhandle;
+ msg.rpc_resp = fattr;
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ }
dprintk("NFS reply lookup: %d\n", status);
if (status >= 0)
status = nfs_refresh_inode(dir, &dir_attr);
@@ -180,7 +193,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
.rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
.rpc_argp = &arg,
.rpc_resp = &res,
- .rpc_cred = entry->cred
+ .rpc_cred = entry->cred,
};
int mode = entry->mask;
int status;
@@ -226,12 +239,16 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
.pglen = pglen,
.pages = &page
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK],
+ .rpc_argp = &args,
+ .rpc_resp = &fattr,
+ };
int status;
dprintk("NFS call readlink\n");
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
- &args, &fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_refresh_inode(inode, &fattr);
dprintk("NFS reply readlink: %d\n", status);
return status;
@@ -327,6 +344,11 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
mode_t mode = sattr->ia_mode;
int status;
@@ -343,8 +365,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
again:
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
- nfs_post_op_update_inode(dir, &dir_attr);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ nfs_refresh_inode(dir, &dir_attr);
/* If the server doesn't support the exclusive creation semantics,
* try again with simple 'guarded' mode. */
@@ -447,7 +469,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
struct rpc_message *msg = &task->tk_msg;
struct nfs_fattr *dir_attr;
- if (nfs3_async_handle_jukebox(task))
+ if (nfs3_async_handle_jukebox(task, dir->d_inode))
return 1;
if (msg->rpc_argp) {
dir_attr = (struct nfs_fattr*)msg->rpc_resp;
@@ -474,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
.fromattr = &old_dir_attr,
.toattr = &new_dir_attr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
nfs_fattr_init(&old_dir_attr);
nfs_fattr_init(&new_dir_attr);
- status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
nfs_post_op_update_inode(old_dir, &old_dir_attr);
nfs_post_op_update_inode(new_dir, &new_dir_attr);
dprintk("NFS reply rename: %d\n", status);
@@ -500,12 +527,17 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
.dir_attr = &dir_attr,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_LINK],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call link %s\n", name->name);
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
nfs_post_op_update_inode(inode, &fattr);
dprintk("NFS reply link: %d\n", status);
@@ -531,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
.fh = fhandle,
.fattr = fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
if (path->len > NFS3_MAXPATHLEN)
@@ -538,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
nfs_fattr_init(&dir_attr);
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
dprintk("NFS reply symlink: %d\n", status);
return status;
@@ -560,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int mode = sattr->ia_mode;
int status;
@@ -569,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
if (status != 0)
goto out;
@@ -591,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
.name = name->name,
.len = name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &dir_attr,
+ };
int status;
dprintk("NFS call rmdir %s\n", name->name);
nfs_fattr_init(&dir_attr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
dprintk("NFS reply rmdir: %d\n", status);
return status;
@@ -672,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fh,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
mode_t mode = sattr->ia_mode;
int status;
@@ -690,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
if (status != 0)
goto out;
@@ -707,11 +759,16 @@ static int
nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsstat *stat)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_FSSTAT],
+ .rpc_argp = fhandle,
+ .rpc_resp = stat,
+ };
int status;
dprintk("NFS call fsstat\n");
nfs_fattr_init(stat->fattr);
- status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply statfs: %d\n", status);
return status;
}
@@ -720,11 +777,16 @@ static int
nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO],
+ .rpc_argp = fhandle,
+ .rpc_resp = info,
+ };
int status;
dprintk("NFS call fsinfo\n");
nfs_fattr_init(info->fattr);
- status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+ status = rpc_call_sync(server->client_sys, &msg, 0);
dprintk("NFS reply fsinfo: %d\n", status);
return status;
}
@@ -733,40 +795,34 @@ static int
nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_pathconf *info)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_PATHCONF],
+ .rpc_argp = fhandle,
+ .rpc_resp = info,
+ };
int status;
dprintk("NFS call pathconf\n");
nfs_fattr_init(info->fattr);
- status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply pathconf: %d\n", status);
return status;
}
extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
-static void nfs3_read_done(struct rpc_task *task, void *calldata)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
-
- if (nfs3_async_handle_jukebox(task))
- return;
+ if (nfs3_async_handle_jukebox(task, data->inode))
+ return -EAGAIN;
/* Call back common NFS readpage processing */
if (task->tk_status >= 0)
nfs_refresh_inode(data->inode, &data->fattr);
- nfs_readpage_result(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs3_read_ops = {
- .rpc_call_done = nfs3_read_done,
- .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs3_proc_read_setup(struct nfs_read_data *data)
+static void nfs3_proc_read_setup(struct nfs_read_data *data)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_READ],
.rpc_argp = &data->args,
@@ -774,37 +830,20 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
.rpc_cred = data->cred,
};
- /* N.B. Do we need to test? Never called for swapfile inode */
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs3_write_done(struct rpc_task *task, void *calldata)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
-
- if (nfs3_async_handle_jukebox(task))
- return;
+ if (nfs3_async_handle_jukebox(task, data->inode))
+ return -EAGAIN;
if (task->tk_status >= 0)
nfs_post_op_update_inode(data->inode, data->res.fattr);
- nfs_writeback_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs3_write_ops = {
- .rpc_call_done = nfs3_write_done,
- .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int stable;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
.rpc_argp = &data->args,
@@ -812,45 +851,28 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how)
.rpc_cred = data->cred,
};
+ data->args.stable = NFS_UNSTABLE;
if (how & FLUSH_STABLE) {
- if (!NFS_I(inode)->ncommit)
- stable = NFS_FILE_SYNC;
- else
- stable = NFS_DATA_SYNC;
- } else
- stable = NFS_UNSTABLE;
- data->args.stable = stable;
-
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ data->args.stable = NFS_FILE_SYNC;
+ if (NFS_I(data->inode)->ncommit)
+ data->args.stable = NFS_DATA_SYNC;
+ }
/* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs3_commit_done(struct rpc_task *task, void *calldata)
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
-
- if (nfs3_async_handle_jukebox(task))
- return;
+ if (nfs3_async_handle_jukebox(task, data->inode))
+ return -EAGAIN;
if (task->tk_status >= 0)
nfs_post_op_update_inode(data->inode, data->res.fattr);
- nfs_commit_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs3_commit_ops = {
- .rpc_call_done = nfs3_commit_done,
- .rpc_release = nfs_commit_release,
-};
-
-static void
-nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
.rpc_argp = &data->args,
@@ -858,12 +880,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
.rpc_cred = data->cred,
};
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
static int
@@ -902,8 +919,11 @@ struct nfs_rpc_ops nfs_v3_clientops = {
.pathconf = nfs3_proc_pathconf,
.decode_dirent = nfs3_decode_dirent,
.read_setup = nfs3_proc_read_setup,
+ .read_done = nfs3_read_done,
.write_setup = nfs3_proc_write_setup,
+ .write_done = nfs3_write_done,
.commit_setup = nfs3_proc_commit_setup,
+ .commit_done = nfs3_commit_done,
.file_open = nfs_open,
.file_release = nfs_release,
.lock = nfs3_proc_lock,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 5224a191efb..ec233619687 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
.p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
.p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \
- .p_timer = timer \
+ .p_timer = timer, \
+ .p_statidx = NFS3PROC_##proc, \
+ .p_name = #proc, \
}
struct rpc_procinfo nfs3_procedures[] = {
@@ -1150,6 +1152,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
.p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
.p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
.p_timer = 1,
+ .p_name = "GETACL",
},
[ACLPROC3_SETACL] = {
.p_proc = ACLPROC3_SETACL,
@@ -1157,6 +1160,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
.p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
.p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
.p_timer = 0,
+ .p_name = "SETACL",
},
};
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f8c0066e02e..47ece1dd3c6 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PROC
@@ -335,7 +336,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
if (!(data->f_attr.valid & NFS_ATTR_FATTR))
goto out;
inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
- if (inode == NULL)
+ if (IS_ERR(inode))
goto out;
state = nfs4_get_open_state(inode, data->owner);
if (state == NULL)
@@ -604,11 +605,14 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
int status;
atomic_inc(&data->count);
+ /*
+ * If rpc_run_task() ends up calling ->rpc_release(), we
+ * want to ensure that it takes the 'error' code path.
+ */
+ data->rpc_status = -ENOMEM;
task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
- if (IS_ERR(task)) {
- nfs4_opendata_free(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
status = nfs4_wait_for_completion_rpc_task(task);
if (status != 0) {
data->cancelled = 1;
@@ -707,11 +711,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
int status;
atomic_inc(&data->count);
+ /*
+ * If rpc_run_task() ends up calling ->rpc_release(), we
+ * want to ensure that it takes the 'error' code path.
+ */
+ data->rpc_status = -ENOMEM;
task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
- if (IS_ERR(task)) {
- nfs4_opendata_free(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
status = nfs4_wait_for_completion_rpc_task(task);
if (status != 0) {
data->cancelled = 1;
@@ -908,7 +915,7 @@ out_put_state_owner:
static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
{
struct nfs4_exception exception = { };
- struct nfs4_state *res;
+ struct nfs4_state *res = ERR_PTR(-EIO);
int err;
do {
@@ -1017,12 +1024,12 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
return res;
}
-static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
- struct nfs_fh *fhandle, struct iattr *sattr,
- struct nfs4_state *state)
+static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+ struct iattr *sattr, struct nfs4_state *state)
{
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs_setattrargs arg = {
- .fh = fhandle,
+ .fh = NFS_FH(inode),
.iap = sattr,
.server = server,
.bitmask = server->attr_bitmask,
@@ -1041,7 +1048,9 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
nfs_fattr_init(fattr);
- if (state != NULL) {
+ if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+ /* Use that stateid */
+ } else if (state != NULL) {
msg.rpc_cred = state->owner->so_cred;
nfs4_copy_stateid(&arg.stateid, state, current->files);
} else
@@ -1053,16 +1062,15 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
return status;
}
-static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
- struct nfs_fh *fhandle, struct iattr *sattr,
- struct nfs4_state *state)
+static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+ struct iattr *sattr, struct nfs4_state *state)
{
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
int err;
do {
err = nfs4_handle_exception(server,
- _nfs4_do_setattr(server, fattr, fhandle, sattr,
- state),
+ _nfs4_do_setattr(inode, fattr, sattr, state),
&exception);
} while (exception.retry);
return err;
@@ -1503,8 +1511,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
if (ctx != NULL)
state = ctx->state;
- status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
- NFS_FH(inode), sattr, state);
+ status = nfs4_do_setattr(inode, fattr, sattr, state);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
if (ctx != NULL)
@@ -1823,8 +1830,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
d_instantiate(dentry, igrab(state->inode));
if (flags & O_EXCL) {
struct nfs_fattr fattr;
- status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
- NFS_FH(state->inode), sattr, state);
+ status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
if (status == 0)
nfs_setattr_update_inode(state->inode, sattr);
}
@@ -2344,75 +2350,50 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return err;
}
-static void nfs4_read_done(struct rpc_task *task, void *calldata)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
- struct inode *inode = data->inode;
+ struct nfs_server *server = NFS_SERVER(data->inode);
- if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+ if (nfs4_async_handle_error(task, server) == -EAGAIN) {
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (task->tk_status > 0)
- renew_lease(NFS_SERVER(inode), data->timestamp);
- /* Call back common NFS readpage processing */
- nfs_readpage_result(task, calldata);
+ renew_lease(server, data->timestamp);
+ return 0;
}
-static const struct rpc_call_ops nfs4_read_ops = {
- .rpc_call_done = nfs4_read_done,
- .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs4_proc_read_setup(struct nfs_read_data *data)
+static void nfs4_proc_read_setup(struct nfs_read_data *data)
{
- struct rpc_task *task = &data->task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
- struct inode *inode = data->inode;
- int flags;
data->timestamp = jiffies;
- /* N.B. Do we need to test? Never called for swapfile inode */
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs4_write_done(struct rpc_task *task, void *calldata)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
struct inode *inode = data->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (task->tk_status >= 0) {
renew_lease(NFS_SERVER(inode), data->timestamp);
nfs_post_op_update_inode(inode, data->res.fattr);
}
- /* Call back common NFS writeback processing */
- nfs_writeback_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs4_write_ops = {
- .rpc_call_done = nfs4_write_done,
- .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
.rpc_argp = &data->args,
@@ -2422,7 +2403,6 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
struct inode *inode = data->inode;
struct nfs_server *server = NFS_SERVER(inode);
int stable;
- int flags;
if (how & FLUSH_STABLE) {
if (!NFS_I(inode)->ncommit)
@@ -2437,57 +2417,37 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
data->timestamp = jiffies;
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
/* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs4_commit_done(struct rpc_task *task, void *calldata)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
struct inode *inode = data->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (task->tk_status >= 0)
nfs_post_op_update_inode(inode, data->res.fattr);
- /* Call back common NFS writeback processing */
- nfs_commit_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs4_commit_ops = {
- .rpc_call_done = nfs4_commit_done,
- .rpc_release = nfs_commit_release,
-};
-
-static void
-nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
- struct inode *inode = data->inode;
- struct nfs_server *server = NFS_SERVER(inode);
- int flags;
+ struct nfs_server *server = NFS_SERVER(data->inode);
data->args.bitmask = server->attr_bitmask;
data->res.server = server;
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
/*
@@ -2755,8 +2715,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
rpc_wake_up_task(task);
task->tk_status = 0;
return -EAGAIN;
- case -NFS4ERR_GRACE:
case -NFS4ERR_DELAY:
+ nfs_inc_server_stats((struct nfs_server *) server,
+ NFSIOS_DELAY);
+ case -NFS4ERR_GRACE:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
task->tk_status = 0;
return -EAGAIN;
@@ -2893,8 +2855,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
return status;
}
-int
-nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
{
struct nfs_fsinfo fsinfo;
struct rpc_message msg = {
@@ -2918,6 +2879,24 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
return status;
}
+int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+ long timeout;
+ int err;
+ do {
+ err = _nfs4_proc_setclientid_confirm(clp, cred);
+ switch (err) {
+ case 0:
+ return err;
+ case -NFS4ERR_RESOURCE:
+ /* The IBM lawyers misread another document! */
+ case -NFS4ERR_DELAY:
+ err = nfs4_delay(clp->cl_rpcclient, &timeout);
+ }
+ } while (err == 0);
+ return err;
+}
+
struct nfs4_delegreturndata {
struct nfs4_delegreturnargs args;
struct nfs4_delegreturnres res;
@@ -2958,7 +2937,7 @@ static void nfs4_delegreturn_release(void *calldata)
kfree(calldata);
}
-const static struct rpc_call_ops nfs4_delegreturn_ops = {
+static const struct rpc_call_ops nfs4_delegreturn_ops = {
.rpc_call_prepare = nfs4_delegreturn_prepare,
.rpc_call_done = nfs4_delegreturn_done,
.rpc_release = nfs4_delegreturn_release,
@@ -2986,10 +2965,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
data->rpc_status = 0;
task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
- if (IS_ERR(task)) {
- nfs4_delegreturn_release(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
status = nfs4_wait_for_completion_rpc_task(task);
if (status == 0) {
status = data->rpc_status;
@@ -3209,7 +3186,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
struct nfs_seqid *seqid)
{
struct nfs4_unlockdata *data;
- struct rpc_task *task;
data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
if (data == NULL) {
@@ -3219,10 +3195,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
/* Unlock _before_ we do the RPC call */
do_vfs_lock(fl->fl_file, fl);
- task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
- if (IS_ERR(task))
- nfs4_locku_release_calldata(data);
- return task;
+ return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
}
static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
@@ -3403,10 +3376,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
data->arg.reclaim = 1;
task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
&nfs4_lock_ops, data);
- if (IS_ERR(task)) {
- nfs4_lock_release(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
ret = nfs4_wait_for_completion_rpc_task(task);
if (ret == 0) {
ret = data->rpc_status;
@@ -3588,6 +3559,8 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
{
size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
+ if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
+ return 0;
if (buf && buflen < len)
return -ERANGE;
if (buf)
@@ -3644,8 +3617,11 @@ struct nfs_rpc_ops nfs_v4_clientops = {
.pathconf = nfs4_proc_pathconf,
.decode_dirent = nfs4_decode_dirent,
.read_setup = nfs4_proc_read_setup,
+ .read_done = nfs4_read_done,
.write_setup = nfs4_proc_write_setup,
+ .write_done = nfs4_write_done,
.commit_setup = nfs4_proc_commit_setup,
+ .commit_done = nfs4_commit_done,
.file_open = nfs_open,
.file_release = nfs_release,
.lock = nfs4_proc_lock,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index afad0255e7d..96e5b82c153 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -977,6 +977,7 @@ out:
out_error:
printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
NIPQUAD(clp->cl_addr.s_addr), -status);
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
goto out;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0a1bd36a483..7c5d70efe72 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4344,6 +4344,8 @@ nfs_stat_to_errno(int stat)
.p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
.p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
+ .p_statidx = NFSPROC4_CLNT_##proc, \
+ .p_name = #proc, \
}
struct rpc_procinfo nfs4_procedures[] = {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d53857b148e..106aca388eb 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,6 +85,9 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
atomic_set(&req->wb_complete, 0);
req->wb_index = page->index;
page_cache_get(page);
+ BUG_ON(PagePrivate(page));
+ BUG_ON(!PageLocked(page));
+ BUG_ON(page->mapping->host != inode);
req->wb_offset = offset;
req->wb_pgbase = offset;
req->wb_bytes = count;
@@ -132,9 +135,11 @@ void nfs_clear_page_writeback(struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
- spin_lock(&nfsi->req_lock);
- radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
- spin_unlock(&nfsi->req_lock);
+ if (req->wb_page != NULL) {
+ spin_lock(&nfsi->req_lock);
+ radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+ spin_unlock(&nfsi->req_lock);
+ }
nfs_unlock_request(req);
}
@@ -147,8 +152,9 @@ void nfs_clear_page_writeback(struct nfs_page *req)
*/
void nfs_clear_request(struct nfs_page *req)
{
- if (req->wb_page) {
- page_cache_release(req->wb_page);
+ struct page *page = req->wb_page;
+ if (page != NULL) {
+ page_cache_release(page);
req->wb_page = NULL;
}
}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f5150d71c03..9dd85cac2df 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -58,16 +58,23 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
{
struct nfs_fattr *fattr = info->fattr;
struct nfs2_fsstat fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
+ .rpc_argp = fhandle,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("%s: call getattr\n", __FUNCTION__);
nfs_fattr_init(fattr);
- status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
+ status = rpc_call_sync(server->client_sys, &msg, 0);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
if (status)
return status;
dprintk("%s: call statfs\n", __FUNCTION__);
- status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
+ msg.rpc_resp = &fsinfo;
+ status = rpc_call_sync(server->client_sys, &msg, 0);
dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
if (status)
return status;
@@ -90,12 +97,16 @@ static int
nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
+ .rpc_argp = fhandle,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(server->client, NFSPROC_GETATTR,
- fhandle, fattr, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
@@ -109,6 +120,11 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
.fh = NFS_FH(inode),
.sattr = sattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_SETATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = fattr,
+ };
int status;
/* Mask out the non-modebit related stuff from attr->ia_mode */
@@ -116,7 +132,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
dprintk("NFS call setattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
dprintk("NFS reply setattr: %d\n", status);
@@ -136,11 +152,16 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name,
.fh = fhandle,
.fattr = fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_LOOKUP],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call lookup %s\n", name->name);
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
dprintk("NFS reply lookup: %d\n", status);
return status;
}
@@ -154,10 +175,14 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page,
.pglen = pglen,
.pages = &page
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_READLINK],
+ .rpc_argp = &args,
+ };
int status;
dprintk("NFS call readlink\n");
- status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
dprintk("NFS reply readlink: %d\n", status);
return status;
}
@@ -233,11 +258,16 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
nfs_fattr_init(&fattr);
dprintk("NFS call create %s\n", dentry->d_name.name);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
dprintk("NFS reply create: %d\n", status);
@@ -263,6 +293,11 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status, mode;
dprintk("NFS call mknod %s\n", dentry->d_name.name);
@@ -277,13 +312,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
}
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == -EINVAL && S_ISFIFO(mode)) {
sattr->ia_mode = mode;
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
}
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -302,8 +337,6 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
.rpc_argp = &arg,
- .rpc_resp = NULL,
- .rpc_cred = NULL
};
int status;
@@ -355,10 +388,14 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
.toname = new_name->name,
.tolen = new_name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_RENAME],
+ .rpc_argp = &arg,
+ };
int status;
dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
- status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
nfs_mark_for_revalidate(old_dir);
nfs_mark_for_revalidate(new_dir);
dprintk("NFS reply rename: %d\n", status);
@@ -374,10 +411,14 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
.toname = name->name,
.tolen = name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_LINK],
+ .rpc_argp = &arg,
+ };
int status;
dprintk("NFS call link %s\n", name->name);
- status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_mark_for_revalidate(inode);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply link: %d\n", status);
@@ -397,6 +438,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
.tolen = path->len,
.sattr = sattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_SYMLINK],
+ .rpc_argp = &arg,
+ };
int status;
if (path->len > NFS2_MAXPATHLEN)
@@ -404,7 +449,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
nfs_fattr_init(fattr);
fhandle->size = 0;
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply symlink: %d\n", status);
return status;
@@ -425,11 +470,16 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_MKDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call mkdir %s\n", dentry->d_name.name);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -445,10 +495,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
.name = name->name,
.len = name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_RMDIR],
+ .rpc_argp = &arg,
+ };
int status;
dprintk("NFS call rmdir %s\n", name->name);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply rmdir: %d\n", status);
return status;
@@ -470,13 +524,12 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
.fh = NFS_FH(dir),
.cookie = cookie,
.count = count,
- .pages = &page
+ .pages = &page,
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READDIR],
.rpc_argp = &arg,
- .rpc_resp = NULL,
- .rpc_cred = cred
+ .rpc_cred = cred,
};
int status;
@@ -495,11 +548,16 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsstat *stat)
{
struct nfs2_fsstat fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_STATFS],
+ .rpc_argp = fhandle,
+ .rpc_resp = &fsinfo,
+ };
int status;
dprintk("NFS call statfs\n");
nfs_fattr_init(stat->fattr);
- status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply statfs: %d\n", status);
if (status)
goto out;
@@ -518,11 +576,16 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
struct nfs2_fsstat fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_STATFS],
+ .rpc_argp = fhandle,
+ .rpc_resp = &fsinfo,
+ };
int status;
dprintk("NFS call fsinfo\n");
nfs_fattr_init(info->fattr);
- status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply fsinfo: %d\n", status);
if (status)
goto out;
@@ -550,10 +613,8 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
-static void nfs_read_done(struct rpc_task *task, void *calldata)
+static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
-
if (task->tk_status >= 0) {
nfs_refresh_inode(data->inode, data->res.fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -562,20 +623,11 @@ static void nfs_read_done(struct rpc_task *task, void *calldata)
if (data->args.offset + data->args.count >= data->res.fattr->size)
data->res.eof = 1;
}
- nfs_readpage_result(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs_read_ops = {
- .rpc_call_done = nfs_read_done,
- .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs_proc_read_setup(struct nfs_read_data *data)
+static void nfs_proc_read_setup(struct nfs_read_data *data)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READ],
.rpc_argp = &data->args,
@@ -583,34 +635,18 @@ nfs_proc_read_setup(struct nfs_read_data *data)
.rpc_cred = data->cred,
};
- /* N.B. Do we need to test? Never called for swapfile inode */
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs_write_done(struct rpc_task *task, void *calldata)
+static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
-
if (task->tk_status >= 0)
nfs_post_op_update_inode(data->inode, data->res.fattr);
- nfs_writeback_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs_write_ops = {
- .rpc_call_done = nfs_write_done,
- .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs_proc_write_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_WRITE],
.rpc_argp = &data->args,
@@ -621,12 +657,8 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how)
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
data->args.stable = NFS_FILE_SYNC;
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
/* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
static void
@@ -672,7 +704,9 @@ struct nfs_rpc_ops nfs_v2_clientops = {
.pathconf = nfs_proc_pathconf,
.decode_dirent = nfs_decode_dirent,
.read_setup = nfs_proc_read_setup,
+ .read_done = nfs_read_done,
.write_setup = nfs_proc_write_setup,
+ .write_done = nfs_write_done,
.commit_setup = nfs_proc_commit_setup,
.file_open = nfs_open,
.file_release = nfs_release,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 05eb43fadf8..3961524fd4a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,17 +31,49 @@
#include <asm/system.h>
+#include "iostat.h"
+
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
static int nfs_pagein_one(struct list_head *, struct inode *);
-static void nfs_readpage_result_partial(struct nfs_read_data *, int);
-static void nfs_readpage_result_full(struct nfs_read_data *, int);
+static const struct rpc_call_ops nfs_read_partial_ops;
+static const struct rpc_call_ops nfs_read_full_ops;
static kmem_cache_t *nfs_rdata_cachep;
-mempool_t *nfs_rdata_mempool;
+static mempool_t *nfs_rdata_mempool;
#define MIN_POOL_READ (32)
+struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+{
+ struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
+ if (pagecount < NFS_PAGEVEC_SIZE)
+ p->pagevec = &p->page_array[0];
+ else {
+ size_t size = ++pagecount * sizeof(struct page *);
+ p->pagevec = kmalloc(size, GFP_NOFS);
+ if (p->pagevec) {
+ memset(p->pagevec, 0, size);
+ } else {
+ mempool_free(p, nfs_rdata_mempool);
+ p = NULL;
+ }
+ }
+ }
+ return p;
+}
+
+void nfs_readdata_free(struct nfs_read_data *p)
+{
+ if (p && (p->pagevec != &p->page_array[0]))
+ kfree(p->pagevec);
+ mempool_free(p, nfs_rdata_mempool);
+}
+
void nfs_readdata_release(void *data)
{
nfs_readdata_free(data);
@@ -133,6 +165,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
}
count -= result;
rdata->args.pgbase += result;
+ nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
+
/* Note: result == 0 should only happen if we're caching
* a write that extends the file and punches a hole.
*/
@@ -196,9 +230,11 @@ static void nfs_readpage_release(struct nfs_page *req)
* Set up the NFS read request struct
*/
static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+ const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset)
{
struct inode *inode;
+ int flags;
data->req = req;
data->inode = inode = req->wb_context->dentry->d_inode;
@@ -216,6 +252,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
data->res.eof = 0;
nfs_fattr_init(&data->fattr);
+ /* Set up the initial task struct. */
+ flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+ rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
NFS_PROTO(inode)->read_setup(data);
data->task.tk_cookie = (unsigned long)inode;
@@ -303,14 +342,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
list_del_init(&data->pages);
data->pagevec[0] = page;
- data->complete = nfs_readpage_result_partial;
if (nbytes > rsize) {
- nfs_read_rpcsetup(req, data, rsize, offset);
+ nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+ rsize, offset);
offset += rsize;
nbytes -= rsize;
} else {
- nfs_read_rpcsetup(req, data, nbytes, offset);
+ nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+ nbytes, offset);
nbytes = 0;
}
nfs_execute_read(data);
@@ -356,8 +396,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
}
req = nfs_list_entry(data->pages.next);
- data->complete = nfs_readpage_result_full;
- nfs_read_rpcsetup(req, data, count, 0);
+ nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
nfs_execute_read(data);
return 0;
@@ -391,12 +430,15 @@ nfs_pagein_list(struct list_head *head, int rpages)
/*
* Handle a read reply that fills part of a page.
*/
-static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
{
+ struct nfs_read_data *data = calldata;
struct nfs_page *req = data->req;
struct page *page = req->wb_page;
- if (status >= 0) {
+ if (nfs_readpage_result(task, data) != 0)
+ return;
+ if (task->tk_status >= 0) {
unsigned int request = data->args.count;
unsigned int result = data->res.count;
@@ -415,20 +457,28 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
}
}
+static const struct rpc_call_ops nfs_read_partial_ops = {
+ .rpc_call_done = nfs_readpage_result_partial,
+ .rpc_release = nfs_readdata_release,
+};
+
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
{
+ struct nfs_read_data *data = calldata;
unsigned int count = data->res.count;
+ if (nfs_readpage_result(task, data) != 0)
+ return;
while (!list_empty(&data->pages)) {
struct nfs_page *req = nfs_list_entry(data->pages.next);
struct page *page = req->wb_page;
nfs_list_remove_request(req);
- if (status >= 0) {
+ if (task->tk_status >= 0) {
if (count < PAGE_CACHE_SIZE) {
if (count < req->wb_bytes)
memclear_highpage_flush(page,
@@ -444,22 +494,33 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
}
}
+static const struct rpc_call_ops nfs_read_full_ops = {
+ .rpc_call_done = nfs_readpage_result_full,
+ .rpc_release = nfs_readdata_release,
+};
+
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-void nfs_readpage_result(struct rpc_task *task, void *calldata)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
struct nfs_readargs *argp = &data->args;
struct nfs_readres *resp = &data->res;
- int status = task->tk_status;
+ int status;
dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
- task->tk_pid, status);
+ task->tk_pid, task->tk_status);
+
+ status = NFS_PROTO(data->inode)->read_done(task, data);
+ if (status != 0)
+ return status;
+
+ nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
/* Is this a short read? */
if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+ nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */
if (resp->count != 0) {
/* Yes, so retry the read at the end of the data */
@@ -467,14 +528,14 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata)
argp->pgbase += resp->count;
argp->count -= resp->count;
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
task->tk_status = -EIO;
}
spin_lock(&data->inode->i_lock);
NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
spin_unlock(&data->inode->i_lock);
- data->complete(data, status);
+ return 0;
}
/*
@@ -491,6 +552,9 @@ int nfs_readpage(struct file *file, struct page *page)
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
page, PAGE_CACHE_SIZE, page->index);
+ nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+ nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+
/*
* Try to flush any pending writes to the file..
*
@@ -570,6 +634,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
nr_pages);
+ nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
if (filp == NULL) {
desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
@@ -582,6 +647,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (!list_empty(&head)) {
int err = nfs_pagein_list(&head, server->rpages);
if (!ret)
+ nfs_add_stats(inode, NFSIOS_READPAGES, err);
ret = err;
}
put_nfs_open_context(desc.ctx);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index a65c7b53d55..0e28189c215 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry)
struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode);
int status = -ENOMEM;
- data = kmalloc(sizeof(*data), GFP_KERNEL);
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out;
- memset(data, 0, sizeof(*data));
data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
if (IS_ERR(data->cred)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9449b683550..3f5225404c9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@
#include <linux/smp_lock.h>
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@@ -76,20 +77,21 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
struct inode *,
struct page *,
unsigned int, unsigned int);
-static void nfs_writeback_done_partial(struct nfs_write_data *, int);
-static void nfs_writeback_done_full(struct nfs_write_data *, int);
static int nfs_wait_on_write_congestion(struct address_space *, int);
static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
unsigned int npages, int how);
+static const struct rpc_call_ops nfs_write_partial_ops;
+static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_commit_ops;
static kmem_cache_t *nfs_wdata_cachep;
-mempool_t *nfs_wdata_mempool;
+static mempool_t *nfs_wdata_mempool;
static mempool_t *nfs_commit_mempool;
static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
-static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
{
struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
@@ -100,11 +102,39 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
p->pagevec = &p->page_array[0];
else {
size_t size = ++pagecount * sizeof(struct page *);
+ p->pagevec = kzalloc(size, GFP_NOFS);
+ if (!p->pagevec) {
+ mempool_free(p, nfs_commit_mempool);
+ p = NULL;
+ }
+ }
+ }
+ return p;
+}
+
+void nfs_commit_free(struct nfs_write_data *p)
+{
+ if (p && (p->pagevec != &p->page_array[0]))
+ kfree(p->pagevec);
+ mempool_free(p, nfs_commit_mempool);
+}
+
+struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+{
+ struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
+ if (pagecount < NFS_PAGEVEC_SIZE)
+ p->pagevec = &p->page_array[0];
+ else {
+ size_t size = ++pagecount * sizeof(struct page *);
p->pagevec = kmalloc(size, GFP_NOFS);
if (p->pagevec) {
memset(p->pagevec, 0, size);
} else {
- mempool_free(p, nfs_commit_mempool);
+ mempool_free(p, nfs_wdata_mempool);
p = NULL;
}
}
@@ -112,11 +142,11 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
return p;
}
-static inline void nfs_commit_free(struct nfs_write_data *p)
+void nfs_writedata_free(struct nfs_write_data *p)
{
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
- mempool_free(p, nfs_commit_mempool);
+ mempool_free(p, nfs_wdata_mempool);
}
void nfs_writedata_release(void *wdata)
@@ -136,6 +166,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
if (i_size >= end)
return;
+ nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
i_size_write(inode, end);
}
@@ -225,6 +256,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
wdata->args.pgbase += result;
written += result;
count -= result;
+ nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
} while (count);
/* Update file length */
nfs_grow_file(page, offset, written);
@@ -281,6 +313,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
int priority = wb_priority(wbc);
int err;
+ nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+ nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+
/*
* Note: We need to ensure that we have a reference to the inode
* if we are to do asynchronous writes. If not, waiting
@@ -345,6 +380,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
struct inode *inode = mapping->host;
int err;
+ nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
+
err = generic_writepages(mapping, wbc);
if (err)
return err;
@@ -356,6 +393,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
if (err < 0)
goto out;
+ nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
wbc->nr_to_write -= err;
if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
err = nfs_wait_on_requests(inode, 0, 0);
@@ -391,6 +429,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
if (nfs_have_delegation(inode, FMODE_WRITE))
nfsi->change_attr++;
}
+ SetPagePrivate(req->wb_page);
nfsi->npages++;
atomic_inc(&req->wb_count);
return 0;
@@ -407,6 +446,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
BUG_ON (!NFS_WBACK_BUSY(req));
spin_lock(&nfsi->req_lock);
+ ClearPagePrivate(req->wb_page);
radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
nfsi->npages--;
if (!nfsi->npages) {
@@ -499,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req)
*
* Interruptible by signals only if mounted with intr flag.
*/
-static int
-nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *req;
@@ -513,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
else
idx_end = idx_start + npages - 1;
- spin_lock(&nfsi->req_lock);
next = idx_start;
while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
if (req->wb_index > idx_end)
@@ -526,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
spin_unlock(&nfsi->req_lock);
error = nfs_wait_on_request(req);
nfs_release_request(req);
+ spin_lock(&nfsi->req_lock);
if (error < 0)
return error;
- spin_lock(&nfsi->req_lock);
res++;
}
- spin_unlock(&nfsi->req_lock);
return res;
}
+static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ int ret;
+
+ spin_lock(&nfsi->req_lock);
+ ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+ spin_unlock(&nfsi->req_lock);
+ return ret;
+}
+
/*
* nfs_scan_dirty - Scan an inode for dirty requests
* @inode: NFS inode to scan
@@ -586,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
}
return res;
}
+#else
+static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+ return 0;
+}
#endif
static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
@@ -598,6 +651,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
if (!bdi_write_congested(bdi))
return 0;
+
+ nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
+
if (intr) {
struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
sigset_t oldset;
@@ -653,8 +709,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
spin_unlock(&nfsi->req_lock);
error = nfs_wait_on_request(req);
nfs_release_request(req);
- if (error < 0)
+ if (error < 0) {
+ if (new)
+ nfs_release_request(new);
return ERR_PTR(error);
+ }
continue;
}
spin_unlock(&nfsi->req_lock);
@@ -748,6 +807,8 @@ int nfs_updatepage(struct file *file, struct page *page,
struct nfs_page *req;
int status = 0;
+ nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
+
dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name, count,
@@ -857,10 +918,12 @@ static inline int flush_task_priority(int how)
*/
static void nfs_write_rpcsetup(struct nfs_page *req,
struct nfs_write_data *data,
+ const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
int how)
{
struct inode *inode;
+ int flags;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
@@ -881,6 +944,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
+ /* Set up the initial task struct. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
NFS_PROTO(inode)->write_setup(data, how);
data->task.tk_priority = flush_task_priority(how);
@@ -910,7 +976,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
-static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
{
struct nfs_page *req = nfs_list_entry(head->next);
struct page *page = req->wb_page;
@@ -944,14 +1010,15 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
list_del_init(&data->pages);
data->pagevec[0] = page;
- data->complete = nfs_writeback_done_partial;
if (nbytes > wsize) {
- nfs_write_rpcsetup(req, data, wsize, offset, how);
+ nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+ wsize, offset, how);
offset += wsize;
nbytes -= wsize;
} else {
- nfs_write_rpcsetup(req, data, nbytes, offset, how);
+ nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+ nbytes, offset, how);
nbytes = 0;
}
nfs_execute_write(data);
@@ -978,16 +1045,13 @@ out_bad:
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
-static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
unsigned int count;
- if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
- return nfs_flush_multi(head, inode, how);
-
data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
if (!data)
goto out_bad;
@@ -1005,9 +1069,8 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
}
req = nfs_list_entry(data->pages.next);
- data->complete = nfs_writeback_done_full;
/* Set up the argument struct */
- nfs_write_rpcsetup(req, data, count, 0, how);
+ nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
nfs_execute_write(data);
return 0;
@@ -1021,24 +1084,32 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
return -ENOMEM;
}
-static int
-nfs_flush_list(struct list_head *head, int wpages, int how)
+static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how)
{
LIST_HEAD(one_request);
- struct nfs_page *req;
- int error = 0;
- unsigned int pages = 0;
+ int (*flush_one)(struct inode *, struct list_head *, int);
+ struct nfs_page *req;
+ int wpages = NFS_SERVER(inode)->wpages;
+ int wsize = NFS_SERVER(inode)->wsize;
+ int error;
- while (!list_empty(head)) {
- pages += nfs_coalesce_requests(head, &one_request, wpages);
+ flush_one = nfs_flush_one;
+ if (wsize < PAGE_CACHE_SIZE)
+ flush_one = nfs_flush_multi;
+ /* For single writes, FLUSH_STABLE is more efficient */
+ if (npages <= wpages && npages == NFS_I(inode)->npages
+ && nfs_list_entry(head->next)->wb_bytes <= wsize)
+ how |= FLUSH_STABLE;
+
+ do {
+ nfs_coalesce_requests(head, &one_request, wpages);
req = nfs_list_entry(one_request.next);
- error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how);
+ error = flush_one(inode, &one_request, how);
if (error < 0)
- break;
- }
- if (error >= 0)
- return pages;
-
+ goto out_err;
+ } while (!list_empty(head));
+ return 0;
+out_err:
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
@@ -1051,8 +1122,9 @@ nfs_flush_list(struct list_head *head, int wpages, int how)
/*
* Handle a write reply that flushed part of a page.
*/
-static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
{
+ struct nfs_write_data *data = calldata;
struct nfs_page *req = data->req;
struct page *page = req->wb_page;
@@ -1062,11 +1134,14 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
req->wb_bytes,
(long long)req_offset(req));
- if (status < 0) {
+ if (nfs_writeback_done(task, data) != 0)
+ return;
+
+ if (task->tk_status < 0) {
ClearPageUptodate(page);
SetPageError(page);
- req->wb_context->error = status;
- dprintk(", error = %d\n", status);
+ req->wb_context->error = task->tk_status;
+ dprintk(", error = %d\n", task->tk_status);
} else {
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
if (data->verf.committed < NFS_FILE_SYNC) {
@@ -1087,6 +1162,11 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
nfs_writepage_release(req);
}
+static const struct rpc_call_ops nfs_write_partial_ops = {
+ .rpc_call_done = nfs_writeback_done_partial,
+ .rpc_release = nfs_writedata_release,
+};
+
/*
* Handle a write reply that flushes a whole page.
*
@@ -1094,11 +1174,15 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
* writebacks since the page->count is kept > 1 for as long
* as the page has a write request pending.
*/
-static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
{
+ struct nfs_write_data *data = calldata;
struct nfs_page *req;
struct page *page;
+ if (nfs_writeback_done(task, data) != 0)
+ return;
+
/* Update attributes as result of writeback. */
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
@@ -1111,13 +1195,13 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
req->wb_bytes,
(long long)req_offset(req));
- if (status < 0) {
+ if (task->tk_status < 0) {
ClearPageUptodate(page);
SetPageError(page);
- req->wb_context->error = status;
+ req->wb_context->error = task->tk_status;
end_page_writeback(page);
nfs_inode_remove_request(req);
- dprintk(", error = %d\n", status);
+ dprintk(", error = %d\n", task->tk_status);
goto next;
}
end_page_writeback(page);
@@ -1139,18 +1223,30 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
}
}
+static const struct rpc_call_ops nfs_write_full_ops = {
+ .rpc_call_done = nfs_writeback_done_full,
+ .rpc_release = nfs_writedata_release,
+};
+
+
/*
* This function is called when the WRITE call is complete.
*/
-void nfs_writeback_done(struct rpc_task *task, void *calldata)
+int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
struct nfs_writeargs *argp = &data->args;
struct nfs_writeres *resp = &data->res;
+ int status;
dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
task->tk_pid, task->tk_status);
+ /* Call the NFS version-specific code */
+ status = NFS_PROTO(data->inode)->write_done(task, data);
+ if (status != 0)
+ return status;
+ nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
/* We tried a write call, but the server did not
@@ -1176,6 +1272,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
if (task->tk_status >= 0 && resp->count < argp->count) {
static unsigned long complain;
+ nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+
/* Has the server at least made some progress? */
if (resp->count != 0) {
/* Was this an NFSv2 write or an NFSv3 stable write? */
@@ -1191,7 +1289,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
argp->stable = NFS_FILE_SYNC;
}
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (time_before(complain, jiffies)) {
printk(KERN_WARNING
@@ -1202,11 +1300,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
/* Can't do anything about it except throw an error. */
task->tk_status = -EIO;
}
-
- /*
- * Process the nfs_page list
- */
- data->complete(data, task->tk_status);
+ return 0;
}
@@ -1220,10 +1314,12 @@ void nfs_commit_release(void *wdata)
* Set up the argument/result storage required for the RPC call.
*/
static void nfs_commit_rpcsetup(struct list_head *head,
- struct nfs_write_data *data, int how)
+ struct nfs_write_data *data,
+ int how)
{
struct nfs_page *first;
struct inode *inode;
+ int flags;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
@@ -1243,7 +1339,10 @@ static void nfs_commit_rpcsetup(struct list_head *head,
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
-
+
+ /* Set up the initial task struct. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
NFS_PROTO(inode)->commit_setup(data, how);
data->task.tk_priority = flush_task_priority(how);
@@ -1284,7 +1383,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
/*
* COMMIT call returned
*/
-void nfs_commit_done(struct rpc_task *task, void *calldata)
+static void nfs_commit_done(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
struct nfs_page *req;
@@ -1293,6 +1392,10 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
dprintk("NFS: %4d nfs_commit_done (status %d)\n",
task->tk_pid, task->tk_status);
+ /* Call the NFS version-specific code */
+ if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+ return;
+
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
@@ -1326,6 +1429,16 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
}
sub_page_state(nr_unstable,res);
}
+
+static const struct rpc_call_ops nfs_commit_ops = {
+ .rpc_call_done = nfs_commit_done,
+ .rpc_release = nfs_commit_release,
+};
+#else
+static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+{
+ return 0;
+}
#endif
static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
@@ -1333,24 +1446,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
{
struct nfs_inode *nfsi = NFS_I(inode);
LIST_HEAD(head);
- int res,
- error = 0;
+ int res;
spin_lock(&nfsi->req_lock);
res = nfs_scan_dirty(inode, &head, idx_start, npages);
spin_unlock(&nfsi->req_lock);
if (res) {
- struct nfs_server *server = NFS_SERVER(inode);
-
- /* For single writes, FLUSH_STABLE is more efficient */
- if (res == nfsi->npages && nfsi->npages <= server->wpages) {
- if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize)
- how |= FLUSH_STABLE;
- }
- error = nfs_flush_list(&head, server->wpages, how);
+ int error = nfs_flush_list(inode, &head, res, how);
+ if (error < 0)
+ return error;
}
- if (error < 0)
- return error;
return res;
}
@@ -1359,14 +1464,13 @@ int nfs_commit_inode(struct inode *inode, int how)
{
struct nfs_inode *nfsi = NFS_I(inode);
LIST_HEAD(head);
- int res,
- error = 0;
+ int res;
spin_lock(&nfsi->req_lock);
res = nfs_scan_commit(inode, &head, 0, 0);
spin_unlock(&nfsi->req_lock);
if (res) {
- error = nfs_commit_list(inode, &head, how);
+ int error = nfs_commit_list(inode, &head, how);
if (error < 0)
return error;
}
@@ -1374,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how)
}
#endif
-int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
- unsigned int npages, int how)
+int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
+ unsigned int npages, int how)
{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ LIST_HEAD(head);
int nocommit = how & FLUSH_NOCOMMIT;
- int wait = how & FLUSH_WAIT;
- int error;
-
- how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
+ int pages, ret;
+ how &= ~FLUSH_NOCOMMIT;
+ spin_lock(&nfsi->req_lock);
do {
- if (wait) {
- error = nfs_wait_on_requests(inode, idx_start, npages);
- if (error != 0)
- continue;
- }
- error = nfs_flush_inode(inode, idx_start, npages, how);
- if (error != 0)
+ ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+ if (ret != 0)
continue;
- if (!nocommit)
- error = nfs_commit_inode(inode, how);
- } while (error > 0);
- return error;
+ pages = nfs_scan_dirty(inode, &head, idx_start, npages);
+ if (pages != 0) {
+ spin_unlock(&nfsi->req_lock);
+ ret = nfs_flush_list(inode, &head, pages, how);
+ spin_lock(&nfsi->req_lock);
+ continue;
+ }
+ if (nocommit)
+ break;
+ pages = nfs_scan_commit(inode, &head, 0, 0);
+ if (pages == 0)
+ break;
+ spin_unlock(&nfsi->req_lock);
+ ret = nfs_commit_list(inode, &head, how);
+ spin_lock(&nfsi->req_lock);
+ } while (ret >= 0);
+ spin_unlock(&nfsi->req_lock);
+ return ret;
}
int nfs_init_writepagecache(void)
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 8d3d23c8a4d..c872bd07fc1 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -326,6 +326,8 @@ out:
.p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
.p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
+ .p_statidx = NFSPROC4_CB_##call, \
+ .p_name = #proc, \
}
static struct rpc_procinfo nfs4_cb_procedures[] = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1143cfb6454..f6ab762bea9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2639,7 +2639,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
struct nfs4_stateid *lock_stp;
struct file *filp;
struct file_lock file_lock;
- struct file_lock *conflock;
+ struct file_lock conflock;
int status = 0;
unsigned int strhashval;
@@ -2775,11 +2775,11 @@ conflicting_lock:
/* XXX There is a race here. Future patch needed to provide
* an atomic posix_lock_and_test_file
*/
- if (!(conflock = posix_test_lock(filp, &file_lock))) {
+ if (!posix_test_lock(filp, &file_lock, &conflock)) {
status = nfserr_serverfault;
goto out;
}
- nfs4_set_lock_denied(conflock, &lock->lk_denied);
+ nfs4_set_lock_denied(&conflock, &lock->lk_denied);
out:
if (status && lock->lk_is_new && lock_sop)
release_stateowner(lock_sop);
@@ -2800,7 +2800,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
struct inode *inode;
struct file file;
struct file_lock file_lock;
- struct file_lock *conflicting_lock;
+ struct file_lock conflock;
int status;
if (nfs4_in_grace())
@@ -2864,10 +2864,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
file.f_dentry = current_fh->fh_dentry;
status = nfs_ok;
- conflicting_lock = posix_test_lock(&file, &file_lock);
- if (conflicting_lock) {
+ if (posix_test_lock(&file, &file_lock, &conflock)) {
status = nfserr_denied;
- nfs4_set_lock_denied(conflicting_lock, &lockt->lt_denied);
+ nfs4_set_lock_denied(&conflock, &lockt->lt_denied);
}
out:
nfs4_unlock_state();
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 20feb7568de..8f1f49ceebe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -104,6 +104,7 @@ enum pid_directory_inos {
PROC_TGID_MAPS,
PROC_TGID_NUMA_MAPS,
PROC_TGID_MOUNTS,
+ PROC_TGID_MOUNTSTATS,
PROC_TGID_WCHAN,
#ifdef CONFIG_MMU
PROC_TGID_SMAPS,
@@ -144,6 +145,7 @@ enum pid_directory_inos {
PROC_TID_MAPS,
PROC_TID_NUMA_MAPS,
PROC_TID_MOUNTS,
+ PROC_TID_MOUNTSTATS,
PROC_TID_WCHAN,
#ifdef CONFIG_MMU
PROC_TID_SMAPS,
@@ -201,6 +203,7 @@ static struct pid_entry tgid_base_stuff[] = {
E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO),
E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO),
E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
+ E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
#ifdef CONFIG_MMU
E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO),
#endif
@@ -732,6 +735,38 @@ static struct file_operations proc_mounts_operations = {
.poll = mounts_poll,
};
+extern struct seq_operations mountstats_op;
+static int mountstats_open(struct inode *inode, struct file *file)
+{
+ struct task_struct *task = proc_task(inode);
+ int ret = seq_open(file, &mountstats_op);
+
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ struct namespace *namespace;
+ task_lock(task);
+ namespace = task->namespace;
+ if (namespace)
+ get_namespace(namespace);
+ task_unlock(task);
+
+ if (namespace)
+ m->private = namespace;
+ else {
+ seq_release(inode, file);
+ ret = -EINVAL;
+ }
+ }
+ return ret;
+}
+
+static struct file_operations proc_mountstats_operations = {
+ .open = mountstats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = mounts_release,
+};
+
#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
static ssize_t proc_info_read(struct file * file, char __user * buf,
@@ -1730,6 +1765,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
inode->i_fop = &proc_smaps_operations;
break;
#endif
+ case PROC_TID_MOUNTSTATS:
+ case PROC_TGID_MOUNTSTATS:
+ inode->i_fop = &proc_mountstats_operations;
+ break;
#ifdef CONFIG_SECURITY
case PROC_TID_ATTR:
inode->i_nlink = 2;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21e8cf795c3..5adf32b90f3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -678,7 +678,6 @@ extern spinlock_t files_lock;
#define FL_POSIX 1
#define FL_FLOCK 2
#define FL_ACCESS 8 /* not trying to lock, just looking */
-#define FL_LOCKD 16 /* lock held by rpc.lockd */
#define FL_LEASE 32 /* lease held on this file */
#define FL_SLEEP 128 /* A blocking lock */
@@ -742,8 +741,6 @@ struct file_lock {
#define OFFT_OFFSET_MAX INT_LIMIT(off_t)
#endif
-extern struct list_head file_lock_list;
-
#include <linux/fcntl.h>
extern int fcntl_getlk(struct file *, struct flock __user *);
@@ -765,10 +762,9 @@ extern void locks_init_lock(struct file_lock *);
extern void locks_copy_lock(struct file_lock *, struct file_lock *);
extern void locks_remove_posix(struct file *, fl_owner_t);
extern void locks_remove_flock(struct file *);
-extern struct file_lock *posix_test_lock(struct file *, struct file_lock *);
+extern int posix_test_lock(struct file *, struct file_lock *, struct file_lock *);
extern int posix_lock_file(struct file *, struct file_lock *);
extern int posix_lock_file_wait(struct file *, struct file_lock *);
-extern void posix_block_lock(struct file_lock *, struct file_lock *);
extern int posix_unblock_lock(struct file *, struct file_lock *);
extern int posix_locks_deadlock(struct file_lock *, struct file_lock *);
extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
@@ -1097,6 +1093,7 @@ struct super_operations {
void (*umount_begin) (struct super_block *);
int (*show_options)(struct seq_file *, struct vfsmount *);
+ int (*show_stats)(struct seq_file *, struct vfsmount *);
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ef21ed29603..995f89dc8c0 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -14,6 +14,7 @@
#include <linux/config.h>
#include <linux/in.h>
#include <linux/fs.h>
+#include <linux/kref.h>
#include <linux/utsname.h>
#include <linux/nfsd/nfsfh.h>
#include <linux/lockd/bind.h>
@@ -58,6 +59,8 @@ struct nlm_host {
unsigned long h_expires; /* eligible for GC */
struct list_head h_lockowners; /* Lockowners for the client */
spinlock_t h_lock;
+ struct list_head h_granted; /* Locks in GRANTED state */
+ struct list_head h_reclaim; /* Locks in RECLAIM state */
};
/*
@@ -83,9 +86,9 @@ struct nlm_rqst {
struct nlm_host * a_host; /* host handle */
struct nlm_args a_args; /* arguments */
struct nlm_res a_res; /* result */
- struct nlm_wait * a_block;
+ struct nlm_block * a_block;
unsigned int a_retries; /* Retry count */
- char a_owner[NLMCLNT_OHSIZE];
+ u8 a_owner[NLMCLNT_OHSIZE];
};
/*
@@ -110,16 +113,16 @@ struct nlm_file {
*/
#define NLM_NEVER (~(unsigned long) 0)
struct nlm_block {
+ struct kref b_count; /* Reference count */
struct nlm_block * b_next; /* linked list (all blocks) */
struct nlm_block * b_fnext; /* linked list (per file) */
- struct nlm_rqst b_call; /* RPC args & callback info */
+ struct nlm_rqst * b_call; /* RPC args & callback info */
struct svc_serv * b_daemon; /* NLM service */
struct nlm_host * b_host; /* host handle for RPC clnt */
unsigned long b_when; /* next re-xmit */
unsigned int b_id; /* block id */
unsigned char b_queued; /* re-queued */
unsigned char b_granted; /* VFS granted lock */
- unsigned char b_incall; /* doing callback */
unsigned char b_done; /* callback complete */
struct nlm_file * b_file; /* file in question */
};
@@ -145,15 +148,16 @@ extern unsigned long nlmsvc_timeout;
/*
* Lockd client functions
*/
-struct nlm_rqst * nlmclnt_alloc_call(void);
-int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
-void nlmclnt_finish_block(struct nlm_rqst *req);
-long nlmclnt_block(struct nlm_rqst *req, long timeout);
+struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
+void nlm_release_call(struct nlm_rqst *);
+int nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+int nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
+void nlmclnt_finish_block(struct nlm_wait *block);
+int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *);
void nlmclnt_recovery(struct nlm_host *, u32);
int nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
-int nlmclnt_setgrantargs(struct nlm_rqst *, struct nlm_lock *);
-void nlmclnt_freegrantargs(struct nlm_rqst *);
/*
* Host cache
@@ -172,7 +176,6 @@ extern struct nlm_host *nlm_find_client(void);
/*
* Server-side lock handling
*/
-int nlmsvc_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
u32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
struct nlm_lock *, int, struct nlm_cookie *);
u32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
@@ -180,7 +183,7 @@ u32 nlmsvc_testlock(struct nlm_file *, struct nlm_lock *,
struct nlm_lock *);
u32 nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
unsigned long nlmsvc_retry_blocked(void);
-int nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
+void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
int action);
void nlmsvc_grant_reply(struct svc_rqst *, struct nlm_cookie *, u32);
diff --git a/include/linux/lockd/share.h b/include/linux/lockd/share.h
index 5d8aa325f14..c75a424ebe4 100644
--- a/include/linux/lockd/share.h
+++ b/include/linux/lockd/share.h
@@ -25,6 +25,6 @@ u32 nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
struct nlm_args *);
u32 nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
struct nlm_args *);
-int nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int);
+void nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int);
#endif /* LINUX_LOCKD_SHARE_H */
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d7a5cc4cfa9..bb0a0f1caa9 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -28,6 +28,7 @@ struct nlm_lock {
int len; /* length of "caller" */
struct nfs_fh fh;
struct xdr_netobj oh;
+ u32 svid;
struct file_lock fl;
};
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b4dc6e2e10c..cbebd7d1b9e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -56,9 +56,7 @@
* When flushing a cluster of dirty pages, there can be different
* strategies:
*/
-#define FLUSH_AGING 0 /* only flush old buffers */
#define FLUSH_SYNC 1 /* file being synced, or contention */
-#define FLUSH_WAIT 2 /* wait for completion */
#define FLUSH_STABLE 4 /* commit to stable storage */
#define FLUSH_LOWPRI 8 /* low priority background flush */
#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
@@ -78,6 +76,7 @@ struct nfs_access_entry {
struct nfs4_state;
struct nfs_open_context {
atomic_t count;
+ struct vfsmount *vfsmnt;
struct dentry *dentry;
struct rpc_cred *cred;
struct nfs4_state *state;
@@ -118,8 +117,7 @@ struct nfs_inode {
unsigned long cache_validity; /* bit mask */
/*
- * read_cache_jiffies is when we started read-caching this inode,
- * and read_cache_mtime is the mtime of the inode at that time.
+ * read_cache_jiffies is when we started read-caching this inode.
* attrtimeo is for how long the cached information is assumed
* to be valid. A successful attribute revalidation doubles
* attrtimeo (up to acregmax/acdirmax), a failure resets it to
@@ -128,11 +126,6 @@ struct nfs_inode {
* We need to revalidate the cached attrs for this inode if
*
* jiffies - read_cache_jiffies > attrtimeo
- *
- * and invalidate any cached data/flush out any dirty pages if
- * we find that
- *
- * mtime != read_cache_mtime
*/
unsigned long read_cache_jiffies;
unsigned long attrtimeo;
@@ -311,12 +304,9 @@ extern void nfs_begin_attr_update(struct inode *);
extern void nfs_end_attr_update(struct inode *);
extern void nfs_begin_data_update(struct inode *);
extern void nfs_end_data_update(struct inode *);
-extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred);
extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
extern void put_nfs_open_context(struct nfs_open_context *ctx);
-extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
-extern void nfs_file_clear_open_context(struct file *filp);
/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
extern u32 root_nfs_parse_addr(char *name); /*__init*/
@@ -415,21 +405,22 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc);
extern int nfs_writepages(struct address_space *, struct writeback_control *);
extern int nfs_flush_incompatible(struct file *file, struct page *page);
extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
-extern void nfs_writeback_done(struct rpc_task *task, void *data);
-extern void nfs_writedata_release(void *data);
+extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+extern void nfs_writedata_release(void *);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern void nfs_commit_done(struct rpc_task *, void *data);
-extern void nfs_commit_release(void *data);
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount);
+void nfs_commit_free(struct nfs_write_data *p);
#endif
/*
* Try to write back everything synchronously (but check the
* return value!)
*/
-extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
+extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
extern int nfs_commit_inode(struct inode *, int);
+extern void nfs_commit_release(void *wdata);
#else
static inline int
nfs_commit_inode(struct inode *inode, int how)
@@ -447,7 +438,7 @@ nfs_have_writebacks(struct inode *inode)
static inline int
nfs_wb_all(struct inode *inode)
{
- int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT);
+ int error = nfs_sync_inode_wait(inode, 0, 0, 0);
return (error < 0) ? error : 0;
}
@@ -456,8 +447,8 @@ nfs_wb_all(struct inode *inode)
*/
static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how)
{
- int error = nfs_sync_inode(inode, page->index, 1,
- how | FLUSH_WAIT | FLUSH_STABLE);
+ int error = nfs_sync_inode_wait(inode, page->index, 1,
+ how | FLUSH_STABLE);
return (error < 0) ? error : 0;
}
@@ -469,37 +460,8 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page)
/*
* Allocate and free nfs_write_data structures
*/
-extern mempool_t *nfs_wdata_mempool;
-
-static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
-{
- struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
-
- if (p) {
- memset(p, 0, sizeof(*p));
- INIT_LIST_HEAD(&p->pages);
- if (pagecount < NFS_PAGEVEC_SIZE)
- p->pagevec = &p->page_array[0];
- else {
- size_t size = ++pagecount * sizeof(struct page *);
- p->pagevec = kmalloc(size, GFP_NOFS);
- if (p->pagevec) {
- memset(p->pagevec, 0, size);
- } else {
- mempool_free(p, nfs_wdata_mempool);
- p = NULL;
- }
- }
- }
- return p;
-}
-
-static inline void nfs_writedata_free(struct nfs_write_data *p)
-{
- if (p && (p->pagevec != &p->page_array[0]))
- kfree(p->pagevec);
- mempool_free(p, nfs_wdata_mempool);
-}
+extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount);
+extern void nfs_writedata_free(struct nfs_write_data *p);
/*
* linux/fs/nfs/read.c
@@ -507,44 +469,14 @@ static inline void nfs_writedata_free(struct nfs_write_data *p)
extern int nfs_readpage(struct file *, struct page *);
extern int nfs_readpages(struct file *, struct address_space *,
struct list_head *, unsigned);
-extern void nfs_readpage_result(struct rpc_task *, void *);
-extern void nfs_readdata_release(void *data);
-
+extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
+extern void nfs_readdata_release(void *data);
/*
* Allocate and free nfs_read_data structures
*/
-extern mempool_t *nfs_rdata_mempool;
-
-static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
-{
- struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
-
- if (p) {
- memset(p, 0, sizeof(*p));
- INIT_LIST_HEAD(&p->pages);
- if (pagecount < NFS_PAGEVEC_SIZE)
- p->pagevec = &p->page_array[0];
- else {
- size_t size = ++pagecount * sizeof(struct page *);
- p->pagevec = kmalloc(size, GFP_NOFS);
- if (p->pagevec) {
- memset(p->pagevec, 0, size);
- } else {
- mempool_free(p, nfs_rdata_mempool);
- p = NULL;
- }
- }
- }
- return p;
-}
-
-static inline void nfs_readdata_free(struct nfs_read_data *p)
-{
- if (p && (p->pagevec != &p->page_array[0]))
- kfree(p->pagevec);
- mempool_free(p, nfs_rdata_mempool);
-}
+extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount);
+extern void nfs_readdata_free(struct nfs_read_data *p);
/*
* linux/fs/nfs3proc.c
diff --git a/include/linux/nfs_fs_i.h b/include/linux/nfs_fs_i.h
index e2c18dabff8..861730275ba 100644
--- a/include/linux/nfs_fs_i.h
+++ b/include/linux/nfs_fs_i.h
@@ -12,8 +12,8 @@ struct nlm_lockowner;
*/
struct nfs_lock_info {
u32 state;
- u32 flags;
struct nlm_lockowner *owner;
+ struct list_head list;
};
struct nfs4_lock_state;
@@ -21,10 +21,4 @@ struct nfs4_lock_info {
struct nfs4_lock_state *owner;
};
-/*
- * Lock flag values
- */
-#define NFS_LCK_GRANTED 0x0001 /* lock has been granted */
-#define NFS_LCK_RECLAIM 0x0002 /* lock marked for reclaiming */
-
#endif
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 3d3a305488c..65dec21af77 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -4,6 +4,8 @@
#include <linux/list.h>
#include <linux/backing-dev.h>
+struct nfs_iostats;
+
/*
* NFS client parameters stored in the superblock.
*/
@@ -12,6 +14,7 @@ struct nfs_server {
struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */
struct rpc_clnt * client_acl; /* ACL RPC client handle */
struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */
+ struct nfs_iostats * io_stats; /* I/O statistics */
struct backing_dev_info backing_dev_info;
int flags; /* various flags */
unsigned int caps; /* server capabilities */
@@ -26,10 +29,13 @@ struct nfs_server {
unsigned int acregmax;
unsigned int acdirmin;
unsigned int acdirmax;
+ unsigned long retrans_timeo; /* retransmit timeout */
+ unsigned int retrans_count; /* number of retransmit tries */
unsigned int namelen;
char * hostname; /* remote hostname */
struct nfs_fh fh;
struct sockaddr_in addr;
+ unsigned long mount_time; /* when this fs was mounted */
#ifdef CONFIG_NFS_V4
/* Our own IP address, as a null-terminated string.
* This is used to generate the clientid, and the callback address.
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6d6f69ec567..7fafc4c546b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -695,7 +695,6 @@ struct nfs_read_data {
#ifdef CONFIG_NFS_V4
unsigned long timestamp; /* For lease renewal */
#endif
- void (*complete) (struct nfs_read_data *, int);
struct page *page_array[NFS_PAGEVEC_SIZE + 1];
};
@@ -714,7 +713,6 @@ struct nfs_write_data {
#ifdef CONFIG_NFS_V4
unsigned long timestamp; /* For lease renewal */
#endif
- void (*complete) (struct nfs_write_data *, int);
struct page *page_array[NFS_PAGEVEC_SIZE + 1];
};
@@ -769,8 +767,11 @@ struct nfs_rpc_ops {
struct nfs_pathconf *);
u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus);
void (*read_setup) (struct nfs_read_data *);
+ int (*read_done) (struct rpc_task *, struct nfs_read_data *);
void (*write_setup) (struct nfs_write_data *, int how);
+ int (*write_done) (struct rpc_task *, struct nfs_write_data *);
void (*commit_setup) (struct nfs_write_data *, int how);
+ int (*commit_done) (struct rpc_task *, struct nfs_write_data *);
int (*file_open) (struct inode *, struct file *);
int (*file_release) (struct inode *, struct file *);
int (*lock)(struct file *, int, struct file_lock *);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index f147e6b8433..8fe9f35eba3 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -45,7 +45,8 @@ struct rpc_clnt {
char * cl_server; /* server machine name */
char * cl_protname; /* protocol name */
struct rpc_auth * cl_auth; /* authenticator */
- struct rpc_stat * cl_stats; /* statistics */
+ struct rpc_stat * cl_stats; /* per-program statistics */
+ struct rpc_iostats * cl_metrics; /* per-client statistics */
unsigned int cl_softrtry : 1,/* soft timeouts */
cl_intr : 1,/* interruptible */
@@ -59,6 +60,7 @@ struct rpc_clnt {
int cl_nodelen; /* nodename length */
char cl_nodename[UNX_MAXNODENAME];
char cl_pathname[30];/* Path in rpc_pipe_fs */
+ struct vfsmount * cl_vfsmnt;
struct dentry * cl_dentry; /* inode */
struct rpc_clnt * cl_parent; /* Points to parent of clones */
struct rpc_rtt cl_rtt_default;
@@ -100,6 +102,8 @@ struct rpc_procinfo {
unsigned int p_bufsiz; /* req. buffer size */
unsigned int p_count; /* call count */
unsigned int p_timer; /* Which RTT timer to use */
+ u32 p_statidx; /* Which procedure to account */
+ char * p_name; /* name of procedure */
};
#define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt))
@@ -137,20 +141,6 @@ size_t rpc_max_payload(struct rpc_clnt *);
void rpc_force_rebind(struct rpc_clnt *);
int rpc_ping(struct rpc_clnt *clnt, int flags);
-static __inline__
-int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
- struct rpc_message msg = {
- .rpc_proc = &clnt->cl_procinfo[proc],
- .rpc_argp = argp,
- .rpc_resp = resp,
- .rpc_cred = NULL
- };
- return rpc_call_sync(clnt, &msg, flags);
-}
-
-extern void rpciod_wake_up(void);
-
/*
* Helper function for NFSroot support
*/
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 2c3601d3104..1279280d719 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -53,6 +53,8 @@ struct krb5_ctx {
struct xdr_netobj mech_used;
};
+extern spinlock_t krb5_seq_lock;
+
#define KG_TOK_MIC_MSG 0x0101
#define KG_TOK_WRAP_MSG 0x0201
diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
new file mode 100644
index 00000000000..8f96e9dc369
--- /dev/null
+++ b/include/linux/sunrpc/metrics.h
@@ -0,0 +1,77 @@
+/*
+ * linux/include/linux/sunrpc/metrics.h
+ *
+ * Declarations for RPC client per-operation metrics
+ *
+ * Copyright (C) 2005 Chuck Lever <cel@netapp.com>
+ *
+ * RPC client per-operation statistics provide latency and retry
+ * information about each type of RPC procedure in a given RPC program.
+ * These statistics are not for detailed problem diagnosis, but simply
+ * to indicate whether the problem is local or remote.
+ *
+ * These counters are not meant to be human-readable, but are meant to be
+ * integrated into system monitoring tools such as "sar" and "iostat". As
+ * such, the counters are sampled by the tools over time, and are never
+ * zeroed after a file system is mounted. Moving averages can be computed
+ * by the tools by taking the difference between two instantaneous samples
+ * and dividing that by the time between the samples.
+ *
+ * The counters are maintained in a single array per RPC client, indexed
+ * by procedure number. There is no need to maintain separate counter
+ * arrays per-CPU because these counters are always modified behind locks.
+ */
+
+#ifndef _LINUX_SUNRPC_METRICS_H
+#define _LINUX_SUNRPC_METRICS_H
+
+#include <linux/seq_file.h>
+
+#define RPC_IOSTATS_VERS "1.0"
+
+struct rpc_iostats {
+ /*
+ * These counters give an idea about how many request
+ * transmissions are required, on average, to complete that
+ * particular procedure. Some procedures may require more
+ * than one transmission because the server is unresponsive,
+ * the client is retransmitting too aggressively, or the
+ * requests are large and the network is congested.
+ */
+ unsigned long om_ops, /* count of operations */
+ om_ntrans, /* count of RPC transmissions */
+ om_timeouts; /* count of major timeouts */
+
+ /*
+ * These count how many bytes are sent and received for a
+ * given RPC procedure type. This indicates how much load a
+ * particular procedure is putting on the network. These
+ * counts include the RPC and ULP headers, and the request
+ * payload.
+ */
+ unsigned long long om_bytes_sent, /* count of bytes out */
+ om_bytes_recv; /* count of bytes in */
+
+ /*
+ * The length of time an RPC request waits in queue before
+ * transmission, the network + server latency of the request,
+ * and the total time the request spent from init to release
+ * are measured.
+ */
+ unsigned long long om_queue, /* jiffies queued for xmit */
+ om_rtt, /* jiffies for RPC RTT */
+ om_execute; /* jiffies for RPC execution */
+} ____cacheline_aligned;
+
+struct rpc_task;
+struct rpc_clnt;
+
+/*
+ * EXPORTed functions for managing rpc_iostats structures
+ */
+struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *);
+void rpc_count_iostats(struct rpc_task *);
+void rpc_print_iostats(struct seq_file *, struct rpc_clnt *);
+void rpc_free_iostats(struct rpc_iostats *);
+
+#endif /* _LINUX_SUNRPC_METRICS_H */
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 63929349571..2c2189cb30a 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -45,6 +45,8 @@ extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
extern int rpc_rmdir(char *);
extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags);
extern int rpc_unlink(char *);
+extern struct vfsmount *rpc_get_mount(void);
+extern void rpc_put_mount(void);
#endif
#endif
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 8b25629accd..82a91bb2236 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -86,6 +86,12 @@ struct rpc_task {
struct work_struct tk_work; /* Async task work queue */
struct rpc_wait tk_wait; /* RPC wait */
} u;
+
+ unsigned short tk_timeouts; /* maj timeouts */
+ size_t tk_bytes_sent; /* total bytes sent */
+ unsigned long tk_start; /* RPC task init timestamp */
+ long tk_rtt; /* round-trip time (jiffies) */
+
#ifdef RPC_DEBUG
unsigned short tk_pid; /* debugging aid */
#endif
@@ -203,6 +209,7 @@ struct rpc_wait_queue {
unsigned char priority; /* current priority */
unsigned char count; /* # task groups remaining serviced so far */
unsigned char nr; /* # tasks remaining for cookie */
+ unsigned short qlen; /* total # tasks waiting in queue */
#ifdef RPC_DEBUG
const char * name;
#endif
@@ -269,13 +276,13 @@ void * rpc_malloc(struct rpc_task *, size_t);
void rpc_free(struct rpc_task *);
int rpciod_up(void);
void rpciod_down(void);
-void rpciod_wake_up(void);
int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *));
#ifdef RPC_DEBUG
void rpc_show_tasks(void);
#endif
int rpc_init_mempool(void);
void rpc_destroy_mempool(void);
+extern struct workqueue_struct *rpciod_workqueue;
static inline void rpc_exit(struct rpc_task *task, int status)
{
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 6ef99b14ff0..7eebbab7160 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -114,6 +114,7 @@ struct rpc_xprt_ops {
void (*release_request)(struct rpc_task *task);
void (*close)(struct rpc_xprt *xprt);
void (*destroy)(struct rpc_xprt *xprt);
+ void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
};
struct rpc_xprt {
@@ -187,6 +188,18 @@ struct rpc_xprt {
struct list_head recv;
+ struct {
+ unsigned long bind_count, /* total number of binds */
+ connect_count, /* total number of connects */
+ connect_start, /* connect start timestamp */
+ connect_time, /* jiffies waiting for connect */
+ sends, /* how many complete requests */
+ recvs, /* how many complete requests */
+ bad_xids; /* lookup_rqst didn't find XID */
+
+ unsigned long long req_u, /* average requests on the wire */
+ bklog_u; /* backlog queue utilization */
+ } stat;
void (*old_data_ready)(struct sock *, int);
void (*old_state_change)(struct sock *);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 8d6f1a176b1..55163af3dca 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -64,14 +64,26 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
struct rpc_authops *ops;
u32 flavor = pseudoflavor_to_flavor(pseudoflavor);
- if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor]))
- return ERR_PTR(-EINVAL);
+ auth = ERR_PTR(-EINVAL);
+ if (flavor >= RPC_AUTH_MAXFLAVOR)
+ goto out;
+
+ /* FIXME - auth_flavors[] really needs an rw lock,
+ * and module refcounting. */
+#ifdef CONFIG_KMOD
+ if ((ops = auth_flavors[flavor]) == NULL)
+ request_module("rpc-auth-%u", flavor);
+#endif
+ if ((ops = auth_flavors[flavor]) == NULL)
+ goto out;
auth = ops->create(clnt, pseudoflavor);
if (IS_ERR(auth))
return auth;
if (clnt->cl_auth)
rpcauth_destroy(clnt->cl_auth);
clnt->cl_auth = auth;
+
+out:
return auth;
}
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index bb46efd92e5..900ef31f5a0 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -721,6 +721,8 @@ gss_destroy(struct rpc_auth *auth)
gss_auth = container_of(auth, struct gss_auth, rpc_auth);
rpc_unlink(gss_auth->path);
+ dput(gss_auth->dentry);
+ gss_auth->dentry = NULL;
gss_mech_put(gss_auth->mech);
rpcauth_free_credcache(auth);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index d0dfdfd5e79..f43311221a7 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -70,15 +70,19 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
+spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED;
+
u32
gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
struct xdr_netobj *token)
{
struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
s32 checksum_type;
- struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
+ char cksumdata[16];
+ struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
unsigned char *ptr, *krb5_hdr, *msg_start;
s32 now;
+ u32 seq_send;
dprintk("RPC: gss_krb5_seal\n");
@@ -133,16 +137,15 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
BUG();
}
- kfree(md5cksum.data);
+ spin_lock(&krb5_seq_lock);
+ seq_send = ctx->seq_send++;
+ spin_unlock(&krb5_seq_lock);
if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
- ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+ seq_send, krb5_hdr + 16, krb5_hdr + 8)))
goto out_err;
- ctx->seq_send++;
-
return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
out_err:
- kfree(md5cksum.data);
return GSS_S_FAILURE;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index db055fd7d77..0828cf64100 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -79,7 +79,8 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
int signalg;
int sealalg;
s32 checksum_type;
- struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
+ char cksumdata[16];
+ struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
s32 now;
int direction;
s32 seqnum;
@@ -176,6 +177,5 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
ret = GSS_S_COMPLETE;
out:
- kfree(md5cksum.data);
return ret;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index af777cf9f25..89d1f3e1412 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -121,12 +121,14 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
{
struct krb5_ctx *kctx = ctx->internal_ctx_id;
s32 checksum_type;
- struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
+ char cksumdata[16];
+ struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
int blocksize = 0, plainlen;
unsigned char *ptr, *krb5_hdr, *msg_start;
s32 now;
int headlen;
struct page **tmp_pages;
+ u32 seq_send;
dprintk("RPC: gss_wrap_kerberos\n");
@@ -205,23 +207,22 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
BUG();
}
- kfree(md5cksum.data);
+ spin_lock(&krb5_seq_lock);
+ seq_send = kctx->seq_send++;
+ spin_unlock(&krb5_seq_lock);
/* XXX would probably be more efficient to compute checksum
* and encrypt at the same time: */
if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
- kctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+ seq_send, krb5_hdr + 16, krb5_hdr + 8)))
goto out_err;
if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
pages))
goto out_err;
- kctx->seq_send++;
-
return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
out_err:
- if (md5cksum.data) kfree(md5cksum.data);
return GSS_S_FAILURE;
}
@@ -232,7 +233,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
int signalg;
int sealalg;
s32 checksum_type;
- struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
+ char cksumdata[16];
+ struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
s32 now;
int direction;
s32 seqnum;
@@ -358,6 +360,5 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
ret = GSS_S_COMPLETE;
out:
- if (md5cksum.data) kfree(md5cksum.data);
return ret;
}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 58400807d4d..5bf11ccba7c 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -102,6 +102,12 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
alg_mode = CRYPTO_TFM_MODE_CBC;
setkey = 1;
break;
+ case NID_cast5_cbc:
+ /* XXXX here in name only, not used */
+ alg_name = "cast5";
+ alg_mode = CRYPTO_TFM_MODE_CBC;
+ setkey = 0; /* XXX will need to set to 1 */
+ break;
case NID_md5:
if (key.len == 0) {
dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n");
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index 86fbf7c3e39..18c7862bc23 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -57,7 +57,8 @@ spkm3_make_token(struct spkm3_ctx *ctx,
{
s32 checksum_type;
char tokhdrbuf[25];
- struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
+ char cksumdata[16];
+ struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf};
int tokenlen = 0;
unsigned char *ptr;
@@ -115,13 +116,11 @@ spkm3_make_token(struct spkm3_ctx *ctx,
dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK not supported\n");
goto out_err;
}
- kfree(md5cksum.data);
/* XXX need to implement sequence numbers, and ctx->expired */
return GSS_S_COMPLETE;
out_err:
- kfree(md5cksum.data);
token->data = NULL;
token->len = 0;
return GSS_S_FAILURE;
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
index 96851b0ba1b..8537f581ef9 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
@@ -56,7 +56,8 @@ spkm3_read_token(struct spkm3_ctx *ctx,
{
s32 code;
struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
- struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
+ char cksumdata[16];
+ struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
unsigned char *ptr = (unsigned char *)read_token->data;
unsigned char *cksum;
int bodysize, md5elen;
@@ -120,7 +121,6 @@ spkm3_read_token(struct spkm3_ctx *ctx,
/* XXX: need to add expiration and sequencing */
ret = GSS_S_COMPLETE;
out:
- kfree(md5cksum.data);
kfree(wire_cksum.data);
return ret;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d7847978204..aa8965e9d30 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -28,12 +28,11 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/utsname.h>
+#include <linux/workqueue.h>
#include <linux/sunrpc/clnt.h>
-#include <linux/workqueue.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
-
-#include <linux/nfs.h>
+#include <linux/sunrpc/metrics.h>
#define RPC_SLACK_SPACE (1024) /* total overkill */
@@ -71,8 +70,15 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
static uint32_t clntid;
int error;
+ clnt->cl_vfsmnt = ERR_PTR(-ENOENT);
+ clnt->cl_dentry = ERR_PTR(-ENOENT);
if (dir_name == NULL)
return 0;
+
+ clnt->cl_vfsmnt = rpc_get_mount();
+ if (IS_ERR(clnt->cl_vfsmnt))
+ return PTR_ERR(clnt->cl_vfsmnt);
+
for (;;) {
snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname),
"%s/clnt%x", dir_name,
@@ -85,6 +91,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
if (error != -EEXIST) {
printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
clnt->cl_pathname, error);
+ rpc_put_mount();
return error;
}
}
@@ -147,6 +154,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
clnt->cl_vers = version->number;
clnt->cl_prot = xprt->prot;
clnt->cl_stats = program->stats;
+ clnt->cl_metrics = rpc_alloc_iostats(clnt);
rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
if (!clnt->cl_port)
@@ -175,7 +183,11 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
return clnt;
out_no_auth:
- rpc_rmdir(clnt->cl_pathname);
+ if (!IS_ERR(clnt->cl_dentry)) {
+ rpc_rmdir(clnt->cl_pathname);
+ dput(clnt->cl_dentry);
+ rpc_put_mount();
+ }
out_no_path:
if (clnt->cl_server != clnt->cl_inline_name)
kfree(clnt->cl_server);
@@ -240,11 +252,15 @@ rpc_clone_client(struct rpc_clnt *clnt)
new->cl_autobind = 0;
new->cl_oneshot = 0;
new->cl_dead = 0;
+ if (!IS_ERR(new->cl_dentry)) {
+ dget(new->cl_dentry);
+ rpc_get_mount();
+ }
rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
if (new->cl_auth)
atomic_inc(&new->cl_auth->au_count);
new->cl_pmap = &new->cl_pmap_default;
- rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
+ new->cl_metrics = rpc_alloc_iostats(clnt);
return new;
out_no_clnt:
printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
@@ -314,6 +330,12 @@ rpc_destroy_client(struct rpc_clnt *clnt)
if (clnt->cl_server != clnt->cl_inline_name)
kfree(clnt->cl_server);
out_free:
+ rpc_free_iostats(clnt->cl_metrics);
+ clnt->cl_metrics = NULL;
+ if (!IS_ERR(clnt->cl_dentry)) {
+ dput(clnt->cl_dentry);
+ rpc_put_mount();
+ }
kfree(clnt);
return 0;
}
@@ -473,15 +495,16 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
int status;
/* If this client is slain all further I/O fails */
+ status = -EIO;
if (clnt->cl_dead)
- return -EIO;
+ goto out_release;
flags |= RPC_TASK_ASYNC;
/* Create/initialize a new RPC task */
status = -ENOMEM;
if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
- goto out;
+ goto out_release;
/* Mask signals on GSS_AUTH upcalls */
rpc_task_sigmask(task, &oldset);
@@ -496,7 +519,10 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
rpc_release_task(task);
rpc_restore_sigmask(&oldset);
-out:
+ return status;
+out_release:
+ if (tk_ops->rpc_release != NULL)
+ tk_ops->rpc_release(data);
return status;
}
@@ -993,6 +1019,8 @@ call_timeout(struct rpc_task *task)
}
dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
+ task->tk_timeouts++;
+
if (RPC_IS_SOFT(task)) {
printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
clnt->cl_protname, clnt->cl_server);
@@ -1045,6 +1073,11 @@ call_decode(struct rpc_task *task)
return;
}
+ /*
+ * Ensure that we see all writes made by xprt_complete_rqst()
+ * before it changed req->rq_received.
+ */
+ smp_rmb();
req->rq_rcv_buf.len = req->rq_private_buf.len;
/* Check that the softirq receive buffer is valid */
@@ -1194,8 +1227,8 @@ call_verify(struct rpc_task *task)
task->tk_action = call_bind;
goto out_retry;
case RPC_AUTH_TOOWEAK:
- printk(KERN_NOTICE "call_verify: server requires stronger "
- "authentication.\n");
+ printk(KERN_NOTICE "call_verify: server %s requires stronger "
+ "authentication.\n", task->tk_client->cl_server);
break;
default:
printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 8139ce68e91..d25b054ec92 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -82,6 +82,7 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
rpc_call_setup(child, &msg, 0);
/* ... and run the child task */
+ task->tk_xprt->stat.bind_count++;
rpc_run_child(task, child, pmap_getport_done);
return;
@@ -103,6 +104,11 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
.pm_prot = prot,
.pm_port = 0
};
+ struct rpc_message msg = {
+ .rpc_proc = &pmap_procedures[PMAP_GETPORT],
+ .rpc_argp = &map,
+ .rpc_resp = &map.pm_port,
+ };
struct rpc_clnt *pmap_clnt;
char hostname[32];
int status;
@@ -116,7 +122,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
return PTR_ERR(pmap_clnt);
/* Setup the call info struct */
- status = rpc_call(pmap_clnt, PMAP_GETPORT, &map, &map.pm_port, 0);
+ status = rpc_call_sync(pmap_clnt, &msg, 0);
if (status >= 0) {
if (map.pm_port != 0)
@@ -161,16 +167,27 @@ pmap_getport_done(struct rpc_task *task)
int
rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
{
- struct sockaddr_in sin;
- struct rpc_portmap map;
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
+ struct rpc_portmap map = {
+ .pm_prog = prog,
+ .pm_vers = vers,
+ .pm_prot = prot,
+ .pm_port = port,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &pmap_procedures[port ? PMAP_SET : PMAP_UNSET],
+ .rpc_argp = &map,
+ .rpc_resp = okay,
+ };
struct rpc_clnt *pmap_clnt;
int error = 0;
dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n",
prog, vers, prot, port);
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
if (IS_ERR(pmap_clnt)) {
error = PTR_ERR(pmap_clnt);
@@ -178,13 +195,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
return error;
}
- map.pm_prog = prog;
- map.pm_vers = vers;
- map.pm_prot = prot;
- map.pm_port = port;
-
- error = rpc_call(pmap_clnt, port? PMAP_SET : PMAP_UNSET,
- &map, okay, 0);
+ error = rpc_call_sync(pmap_clnt, &msg, 0);
if (error < 0) {
printk(KERN_WARNING
@@ -260,6 +271,8 @@ static struct rpc_procinfo pmap_procedures[] = {
.p_decode = (kxdrproc_t) xdr_decode_bool,
.p_bufsiz = 4,
.p_count = 1,
+ .p_statidx = PMAP_SET,
+ .p_name = "SET",
},
[PMAP_UNSET] = {
.p_proc = PMAP_UNSET,
@@ -267,6 +280,8 @@ static struct rpc_procinfo pmap_procedures[] = {
.p_decode = (kxdrproc_t) xdr_decode_bool,
.p_bufsiz = 4,
.p_count = 1,
+ .p_statidx = PMAP_UNSET,
+ .p_name = "UNSET",
},
[PMAP_GETPORT] = {
.p_proc = PMAP_GETPORT,
@@ -274,6 +289,8 @@ static struct rpc_procinfo pmap_procedures[] = {
.p_decode = (kxdrproc_t) xdr_decode_port,
.p_bufsiz = 4,
.p_count = 1,
+ .p_statidx = PMAP_GETPORT,
+ .p_name = "GETPORT",
},
};
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index ad9d9fc4e73..aa4158be990 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -91,7 +91,8 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
res = 0;
} else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) {
if (list_empty(&rpci->pipe))
- schedule_delayed_work(&rpci->queue_timeout,
+ queue_delayed_work(rpciod_workqueue,
+ &rpci->queue_timeout,
RPC_UPCALL_TIMEOUT);
list_add_tail(&msg->list, &rpci->pipe);
rpci->pipelen += msg->len;
@@ -132,7 +133,7 @@ rpc_close_pipes(struct inode *inode)
if (ops->release_pipe)
ops->release_pipe(inode);
cancel_delayed_work(&rpci->queue_timeout);
- flush_scheduled_work();
+ flush_workqueue(rpciod_workqueue);
}
rpc_inode_setowner(inode, NULL);
mutex_unlock(&inode->i_mutex);
@@ -434,14 +435,17 @@ static struct rpc_filelist authfiles[] = {
},
};
-static int
-rpc_get_mount(void)
+struct vfsmount *rpc_get_mount(void)
{
- return simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count);
+ int err;
+
+ err = simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count);
+ if (err != 0)
+ return ERR_PTR(err);
+ return rpc_mount;
}
-static void
-rpc_put_mount(void)
+void rpc_put_mount(void)
{
simple_release_fs(&rpc_mount, &rpc_mount_count);
}
@@ -451,12 +455,13 @@ rpc_lookup_parent(char *path, struct nameidata *nd)
{
if (path[0] == '\0')
return -ENOENT;
- if (rpc_get_mount()) {
+ nd->mnt = rpc_get_mount();
+ if (IS_ERR(nd->mnt)) {
printk(KERN_WARNING "%s: %s failed to mount "
"pseudofilesystem \n", __FILE__, __FUNCTION__);
- return -ENODEV;
+ return PTR_ERR(nd->mnt);
}
- nd->mnt = mntget(rpc_mount);
+ mntget(nd->mnt);
nd->dentry = dget(rpc_mount->mnt_root);
nd->last_type = LAST_ROOT;
nd->flags = LOOKUP_PARENT;
@@ -593,7 +598,6 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry)
d_instantiate(dentry, inode);
dir->i_nlink++;
inode_dir_notify(dir, DN_CREATE);
- rpc_get_mount();
return 0;
out_err:
printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
@@ -614,7 +618,6 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
if (!error) {
inode_dir_notify(dir, DN_DELETE);
d_drop(dentry);
- rpc_put_mount();
}
return 0;
}
@@ -668,7 +671,7 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
out:
mutex_unlock(&dir->i_mutex);
rpc_release_path(&nd);
- return dentry;
+ return dget(dentry);
err_depopulate:
rpc_depopulate(dentry);
__rpc_rmdir(dir, dentry);
@@ -732,7 +735,7 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
out:
mutex_unlock(&dir->i_mutex);
rpc_release_path(&nd);
- return dentry;
+ return dget(dentry);
err_dput:
dput(dentry);
dentry = ERR_PTR(-ENOMEM);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index dff07795bd1..b9969b91a9f 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -65,7 +65,7 @@ static LIST_HEAD(all_tasks);
*/
static DEFINE_MUTEX(rpciod_mutex);
static unsigned int rpciod_users;
-static struct workqueue_struct *rpciod_workqueue;
+struct workqueue_struct *rpciod_workqueue;
/*
* Spinlock for other critical sections of code.
@@ -182,6 +182,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *
else
list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
task->u.tk_wait.rpc_waitq = queue;
+ queue->qlen++;
rpc_set_queued(task);
dprintk("RPC: %4d added to queue %p \"%s\"\n",
@@ -216,6 +217,7 @@ static void __rpc_remove_wait_queue(struct rpc_task *task)
__rpc_remove_wait_queue_priority(task);
else
list_del(&task->u.tk_wait.list);
+ queue->qlen--;
dprintk("RPC: %4d removed from queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue));
}
@@ -816,6 +818,9 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
BUG_ON(task->tk_ops == NULL);
+ /* starting timestamp */
+ task->tk_start = jiffies;
+
dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
current->pid);
}
@@ -917,8 +922,11 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
{
struct rpc_task *task;
task = rpc_new_task(clnt, flags, ops, data);
- if (task == NULL)
+ if (task == NULL) {
+ if (ops->rpc_release != NULL)
+ ops->rpc_release(data);
return ERR_PTR(-ENOMEM);
+ }
atomic_inc(&task->tk_count);
rpc_execute(task);
return task;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 4979f226e28..790941e8af4 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/metrics.h>
#define RPCDBG_FACILITY RPCDBG_MISC
@@ -106,6 +107,120 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
}
}
+/**
+ * rpc_alloc_iostats - allocate an rpc_iostats structure
+ * @clnt: RPC program, version, and xprt
+ *
+ */
+struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
+{
+ unsigned int ops = clnt->cl_maxproc;
+ size_t size = ops * sizeof(struct rpc_iostats);
+ struct rpc_iostats *new;
+
+ new = kmalloc(size, GFP_KERNEL);
+ if (new)
+ memset(new, 0 , size);
+ return new;
+}
+EXPORT_SYMBOL(rpc_alloc_iostats);
+
+/**
+ * rpc_free_iostats - release an rpc_iostats structure
+ * @stats: doomed rpc_iostats structure
+ *
+ */
+void rpc_free_iostats(struct rpc_iostats *stats)
+{
+ kfree(stats);
+}
+EXPORT_SYMBOL(rpc_free_iostats);
+
+/**
+ * rpc_count_iostats - tally up per-task stats
+ * @task: completed rpc_task
+ *
+ * Relies on the caller for serialization.
+ */
+void rpc_count_iostats(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_iostats *stats = task->tk_client->cl_metrics;
+ struct rpc_iostats *op_metrics;
+ long rtt, execute, queue;
+
+ if (!stats || !req)
+ return;
+ op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx];
+
+ op_metrics->om_ops++;
+ op_metrics->om_ntrans += req->rq_ntrans;
+ op_metrics->om_timeouts += task->tk_timeouts;
+
+ op_metrics->om_bytes_sent += task->tk_bytes_sent;
+ op_metrics->om_bytes_recv += req->rq_received;
+
+ queue = (long)req->rq_xtime - task->tk_start;
+ if (queue < 0)
+ queue = -queue;
+ op_metrics->om_queue += queue;
+
+ rtt = task->tk_rtt;
+ if (rtt < 0)
+ rtt = -rtt;
+ op_metrics->om_rtt += rtt;
+
+ execute = (long)jiffies - task->tk_start;
+ if (execute < 0)
+ execute = -execute;
+ op_metrics->om_execute += execute;
+}
+
+void _print_name(struct seq_file *seq, unsigned int op, struct rpc_procinfo *procs)
+{
+ if (procs[op].p_name)
+ seq_printf(seq, "\t%12s: ", procs[op].p_name);
+ else if (op == 0)
+ seq_printf(seq, "\t NULL: ");
+ else
+ seq_printf(seq, "\t%12u: ", op);
+}
+
+#define MILLISECS_PER_JIFFY (1000 / HZ)
+
+void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
+{
+ struct rpc_iostats *stats = clnt->cl_metrics;
+ struct rpc_xprt *xprt = clnt->cl_xprt;
+ unsigned int op, maxproc = clnt->cl_maxproc;
+
+ if (!stats)
+ return;
+
+ seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS);
+ seq_printf(seq, "p/v: %u/%u (%s)\n",
+ clnt->cl_prog, clnt->cl_vers, clnt->cl_protname);
+
+ if (xprt)
+ xprt->ops->print_stats(xprt, seq);
+
+ seq_printf(seq, "\tper-op statistics\n");
+ for (op = 0; op < maxproc; op++) {
+ struct rpc_iostats *metrics = &stats[op];
+ _print_name(seq, op, clnt->cl_procinfo);
+ seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n",
+ metrics->om_ops,
+ metrics->om_ntrans,
+ metrics->om_timeouts,
+ metrics->om_bytes_sent,
+ metrics->om_bytes_recv,
+ metrics->om_queue * MILLISECS_PER_JIFFY,
+ metrics->om_rtt * MILLISECS_PER_JIFFY,
+ metrics->om_execute * MILLISECS_PER_JIFFY);
+ }
+}
+EXPORT_SYMBOL(rpc_print_iostats);
+
/*
* Register/unregister RPC proc files
*/
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8ff2c8acb22..4dd5b3cfe75 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -44,13 +44,13 @@
#include <linux/random.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/metrics.h>
/*
* Local variables
*/
#ifdef RPC_DEBUG
-# undef RPC_DEBUG_DATA
# define RPCDBG_FACILITY RPCDBG_XPRT
#endif
@@ -548,6 +548,7 @@ void xprt_connect(struct rpc_task *task)
task->tk_timeout = xprt->connect_timeout;
rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
+ xprt->stat.connect_start = jiffies;
xprt->ops->connect(task);
}
return;
@@ -558,6 +559,8 @@ static void xprt_connect_status(struct rpc_task *task)
struct rpc_xprt *xprt = task->tk_xprt;
if (task->tk_status >= 0) {
+ xprt->stat.connect_count++;
+ xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
dprintk("RPC: %4d xprt_connect_status: connection established\n",
task->tk_pid);
return;
@@ -601,16 +604,14 @@ static void xprt_connect_status(struct rpc_task *task)
struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
{
struct list_head *pos;
- struct rpc_rqst *req = NULL;
list_for_each(pos, &xprt->recv) {
struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
- if (entry->rq_xid == xid) {
- req = entry;
- break;
- }
+ if (entry->rq_xid == xid)
+ return entry;
}
- return req;
+ xprt->stat.bad_xids++;
+ return NULL;
}
/**
@@ -646,7 +647,12 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
dprintk("RPC: %5u xid %08x complete (%d bytes received)\n",
task->tk_pid, ntohl(req->rq_xid), copied);
+ task->tk_xprt->stat.recvs++;
+ task->tk_rtt = (long)jiffies - req->rq_xtime;
+
list_del_init(&req->rq_list);
+ /* Ensure all writes are done before we update req->rq_received */
+ smp_wmb();
req->rq_received = req->rq_private_buf.len = copied;
rpc_wake_up_task(task);
}
@@ -723,7 +729,6 @@ void xprt_transmit(struct rpc_task *task)
dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
- smp_rmb();
if (!req->rq_received) {
if (list_empty(&req->rq_list)) {
spin_lock_bh(&xprt->transport_lock);
@@ -744,12 +749,19 @@ void xprt_transmit(struct rpc_task *task)
if (status == 0) {
dprintk("RPC: %4d xmit complete\n", task->tk_pid);
spin_lock_bh(&xprt->transport_lock);
+
xprt->ops->set_retrans_timeout(task);
+
+ xprt->stat.sends++;
+ xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
+ xprt->stat.bklog_u += xprt->backlog.qlen;
+
/* Don't race with disconnect */
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
else if (!req->rq_received)
rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
+
xprt->ops->release_xprt(xprt, task);
spin_unlock_bh(&xprt->transport_lock);
return;
@@ -848,6 +860,7 @@ void xprt_release(struct rpc_task *task)
if (!(req = task->tk_rqstp))
return;
+ rpc_count_iostats(task);
spin_lock_bh(&xprt->transport_lock);
xprt->ops->release_xprt(xprt, task);
if (xprt->ops->release_request)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c