aboutsummaryrefslogtreecommitdiff
path: root/drivers/block/nbd.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/nbd.c')
-rw-r--r--drivers/block/nbd.c148
1 files changed, 103 insertions, 45 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 061427a75d3..fb31b8ee437 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -78,6 +78,8 @@ static const char *ioctl_cmd_to_ascii(int cmd)
case NBD_SET_SOCK: return "set-sock";
case NBD_SET_BLKSIZE: return "set-blksize";
case NBD_SET_SIZE: return "set-size";
+ case NBD_SET_TIMEOUT: return "set-timeout";
+ case NBD_SET_FLAGS: return "set-flags";
case NBD_DO_IT: return "do-it";
case NBD_CLEAR_SOCK: return "clear-sock";
case NBD_CLEAR_QUE: return "clear-que";
@@ -96,6 +98,8 @@ static const char *nbdcmd_to_ascii(int cmd)
case NBD_CMD_READ: return "read";
case NBD_CMD_WRITE: return "write";
case NBD_CMD_DISC: return "disconnect";
+ case NBD_CMD_FLUSH: return "flush";
+ case NBD_CMD_TRIM: return "trim/discard";
}
return "invalid";
}
@@ -154,6 +158,7 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
struct msghdr msg;
struct kvec iov;
sigset_t blocked, oldset;
+ unsigned long pflags = current->flags;
if (unlikely(!sock)) {
dev_err(disk_to_dev(nbd->disk),
@@ -167,8 +172,9 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
siginitsetinv(&blocked, sigmask(SIGKILL));
sigprocmask(SIG_SETMASK, &blocked, &oldset);
+ current->flags |= PF_MEMALLOC;
do {
- sock->sk->sk_allocation = GFP_NOIO;
+ sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
iov.iov_base = buf;
iov.iov_len = size;
msg.msg_name = NULL;
@@ -214,6 +220,7 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
} while (size > 0);
sigprocmask(SIG_SETMASK, &oldset, NULL);
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
return result;
}
@@ -236,10 +243,14 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
struct nbd_request request;
unsigned long size = blk_rq_bytes(req);
+ memset(&request, 0, sizeof(request));
request.magic = htonl(NBD_REQUEST_MAGIC);
request.type = htonl(nbd_cmd(req));
- request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
- request.len = htonl(size);
+
+ if (nbd_cmd(req) != NBD_CMD_FLUSH && nbd_cmd(req) != NBD_CMD_DISC) {
+ request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
+ request.len = htonl(size);
+ }
memcpy(request.handle, &req, sizeof(req));
dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
@@ -257,18 +268,18 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
if (nbd_cmd(req) == NBD_CMD_WRITE) {
struct req_iterator iter;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
/*
* we are really probing at internals to determine
* whether to set MSG_MORE or not...
*/
rq_for_each_segment(bvec, req, iter) {
flags = 0;
- if (!rq_iter_last(req, iter))
+ if (!rq_iter_last(bvec, iter))
flags = MSG_MORE;
dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
- nbd->disk->disk_name, req, bvec->bv_len);
- result = sock_send_bvec(nbd, bvec, flags);
+ nbd->disk->disk_name, req, bvec.bv_len);
+ result = sock_send_bvec(nbd, &bvec, flags);
if (result <= 0) {
dev_err(disk_to_dev(nbd->disk),
"Send data failed (result %d)\n",
@@ -364,10 +375,10 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
nbd->disk->disk_name, req);
if (nbd_cmd(req) == NBD_CMD_READ) {
struct req_iterator iter;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
rq_for_each_segment(bvec, req, iter) {
- result = sock_recv_bvec(nbd, bvec);
+ result = sock_recv_bvec(nbd, &bvec);
if (result <= 0) {
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
result);
@@ -375,7 +386,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
return req;
}
dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
- nbd->disk->disk_name, req, bvec->bv_len);
+ nbd->disk->disk_name, req, bvec.bv_len);
}
}
return req;
@@ -405,6 +416,7 @@ static int nbd_do_it(struct nbd_device *nbd)
BUG_ON(nbd->magic != NBD_MAGIC);
+ sk_set_memalloc(nbd->sock->sk);
nbd->pid = task_pid_nr(current);
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
if (ret) {
@@ -445,6 +457,14 @@ static void nbd_clear_que(struct nbd_device *nbd)
req->errors++;
nbd_end_request(req);
}
+
+ while (!list_empty(&nbd->waiting_queue)) {
+ req = list_entry(nbd->waiting_queue.next, struct request,
+ queuelist);
+ list_del_init(&req->queuelist);
+ req->errors++;
+ nbd_end_request(req);
+ }
}
@@ -455,14 +475,23 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
nbd_cmd(req) = NBD_CMD_READ;
if (rq_data_dir(req) == WRITE) {
- nbd_cmd(req) = NBD_CMD_WRITE;
- if (nbd->flags & NBD_READ_ONLY) {
+ if ((req->cmd_flags & REQ_DISCARD)) {
+ WARN_ON(!(nbd->flags & NBD_FLAG_SEND_TRIM));
+ nbd_cmd(req) = NBD_CMD_TRIM;
+ } else
+ nbd_cmd(req) = NBD_CMD_WRITE;
+ if (nbd->flags & NBD_FLAG_READ_ONLY) {
dev_err(disk_to_dev(nbd->disk),
"Write on read-only\n");
goto error_out;
}
}
+ if (req->cmd_flags & REQ_FLUSH) {
+ BUG_ON(unlikely(blk_rq_sectors(req)));
+ nbd_cmd(req) = NBD_CMD_FLUSH;
+ }
+
req->errors = 0;
mutex_lock(&nbd->tx_lock);
@@ -481,7 +510,7 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
nbd_end_request(req);
} else {
spin_lock(&nbd->queue_lock);
- list_add(&req->queuelist, &nbd->queue_head);
+ list_add_tail(&req->queuelist, &nbd->queue_head);
spin_unlock(&nbd->queue_lock);
}
@@ -501,7 +530,7 @@ static int nbd_thread(void *data)
struct nbd_device *nbd = data;
struct request *req;
- set_user_nice(current, -20);
+ set_user_nice(current, MIN_NICE);
while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
/* wait for something to do */
wait_event_interruptible(nbd->waiting_wq,
@@ -532,6 +561,7 @@ static int nbd_thread(void *data)
*/
static void do_nbd_request(struct request_queue *q)
+ __releases(q->queue_lock) __acquires(q->queue_lock)
{
struct request *req;
@@ -576,45 +606,50 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
struct request sreq;
dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
+ if (!nbd->sock)
+ return -EINVAL;
+ mutex_unlock(&nbd->tx_lock);
+ fsync_bdev(bdev);
+ mutex_lock(&nbd->tx_lock);
blk_rq_init(NULL, &sreq);
sreq.cmd_type = REQ_TYPE_SPECIAL;
nbd_cmd(&sreq) = NBD_CMD_DISC;
+
+ /* Check again after getting mutex back. */
if (!nbd->sock)
return -EINVAL;
+
+ nbd->disconnect = 1;
+
nbd_send_req(nbd, &sreq);
- return 0;
+ return 0;
}
case NBD_CLEAR_SOCK: {
- struct file *file;
-
+ struct socket *sock = nbd->sock;
nbd->sock = NULL;
- file = nbd->file;
- nbd->file = NULL;
nbd_clear_que(nbd);
BUG_ON(!list_empty(&nbd->queue_head));
- if (file)
- fput(file);
+ BUG_ON(!list_empty(&nbd->waiting_queue));
+ kill_bdev(bdev);
+ if (sock)
+ sockfd_put(sock);
return 0;
}
case NBD_SET_SOCK: {
- struct file *file;
- if (nbd->file)
+ struct socket *sock;
+ int err;
+ if (nbd->sock)
return -EBUSY;
- file = fget(arg);
- if (file) {
- struct inode *inode = file->f_path.dentry->d_inode;
- if (S_ISSOCK(inode->i_mode)) {
- nbd->file = file;
- nbd->sock = SOCKET_I(inode);
- if (max_part > 0)
- bdev->bd_invalidated = 1;
- return 0;
- } else {
- fput(file);
- }
+ sock = sockfd_lookup(arg, &err);
+ if (sock) {
+ nbd->sock = sock;
+ if (max_part > 0)
+ bdev->bd_invalidated = 1;
+ nbd->disconnect = 0; /* we're connected now */
+ return 0;
}
return -EINVAL;
}
@@ -638,6 +673,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
nbd->xmit_timeout = arg * HZ;
return 0;
+ case NBD_SET_FLAGS:
+ nbd->flags = arg;
+ return 0;
+
case NBD_SET_SIZE_BLOCKS:
nbd->bytesize = ((u64) arg) * nbd->blksize;
bdev->bd_inode->i_size = nbd->bytesize;
@@ -647,17 +686,28 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_DO_IT: {
struct task_struct *thread;
- struct file *file;
+ struct socket *sock;
int error;
if (nbd->pid)
return -EBUSY;
- if (!nbd->file)
+ if (!nbd->sock)
return -EINVAL;
mutex_unlock(&nbd->tx_lock);
- thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name);
+ if (nbd->flags & NBD_FLAG_READ_ONLY)
+ set_device_ro(bdev, true);
+ if (nbd->flags & NBD_FLAG_SEND_TRIM)
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+ nbd->disk->queue);
+ if (nbd->flags & NBD_FLAG_SEND_FLUSH)
+ blk_queue_flush(nbd->disk->queue, REQ_FLUSH);
+ else
+ blk_queue_flush(nbd->disk->queue, 0);
+
+ thread = kthread_create(nbd_thread, nbd, "%s",
+ nbd->disk->disk_name);
if (IS_ERR(thread)) {
mutex_lock(&nbd->tx_lock);
return PTR_ERR(thread);
@@ -670,17 +720,23 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
if (error)
return error;
sock_shutdown(nbd, 0);
- file = nbd->file;
- nbd->file = NULL;
+ sock = nbd->sock;
+ nbd->sock = NULL;
nbd_clear_que(nbd);
dev_warn(disk_to_dev(nbd->disk), "queue cleared\n");
- if (file)
- fput(file);
+ kill_bdev(bdev);
+ queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
+ set_device_ro(bdev, false);
+ if (sock)
+ sockfd_put(sock);
+ nbd->flags = 0;
nbd->bytesize = 0;
bdev->bd_inode->i_size = 0;
set_capacity(nbd->disk, 0);
if (max_part > 0)
ioctl_by_bdev(bdev, BLKRRPART, 0);
+ if (nbd->disconnect) /* user requested, ignore socket errors */
+ return 0;
return nbd->harderror;
}
@@ -689,7 +745,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
* This is for compatibility only. The queue is always cleared
* by NBD_DO_IT or NBD_CLEAR_SOCK.
*/
- BUG_ON(!nbd->sock && !list_empty(&nbd->queue_head));
return 0;
case NBD_PRINT_DEBUG:
@@ -792,6 +847,11 @@ static int __init nbd_init(void)
* Tell the block layer that we are not a rotational device
*/
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
+ disk->queue->limits.discard_granularity = 512;
+ disk->queue->limits.max_discard_sectors = UINT_MAX;
+ disk->queue->limits.discard_zeroes_data = 0;
+ blk_queue_max_hw_sectors(disk->queue, 65536);
+ disk->queue->limits.max_sectors = 256;
}
if (register_blkdev(NBD_MAJOR, "nbd")) {
@@ -804,9 +864,7 @@ static int __init nbd_init(void)
for (i = 0; i < nbds_max; i++) {
struct gendisk *disk = nbd_dev[i].disk;
- nbd_dev[i].file = NULL;
nbd_dev[i].magic = NBD_MAGIC;
- nbd_dev[i].flags = 0;
INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
spin_lock_init(&nbd_dev[i].queue_lock);
INIT_LIST_HEAD(&nbd_dev[i].queue_head);