aboutsummaryrefslogtreecommitdiff
path: root/net/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/messenger.c108
-rw-r--r--net/ceph/osd_client.c38
-rw-r--r--net/ceph/osdmap.c13
3 files changed, 114 insertions, 45 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 05f357828a2..78b55f49de7 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -76,7 +76,8 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss)
break;
default:
- sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family);
+ snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %d)",
+ (int)ss->ss_family);
}
return s;
@@ -598,7 +599,7 @@ static void prepare_write_keepalive(struct ceph_connection *con)
* Connection negotiation.
*/
-static void prepare_connect_authorizer(struct ceph_connection *con)
+static int prepare_connect_authorizer(struct ceph_connection *con)
{
void *auth_buf;
int auth_len = 0;
@@ -612,13 +613,20 @@ static void prepare_connect_authorizer(struct ceph_connection *con)
con->auth_retry);
mutex_lock(&con->mutex);
+ if (test_bit(CLOSED, &con->state) ||
+ test_bit(OPENING, &con->state))
+ return -EAGAIN;
+
con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
con->out_connect.authorizer_len = cpu_to_le32(auth_len);
- con->out_kvec[con->out_kvec_left].iov_base = auth_buf;
- con->out_kvec[con->out_kvec_left].iov_len = auth_len;
- con->out_kvec_left++;
- con->out_kvec_bytes += auth_len;
+ if (auth_len) {
+ con->out_kvec[con->out_kvec_left].iov_base = auth_buf;
+ con->out_kvec[con->out_kvec_left].iov_len = auth_len;
+ con->out_kvec_left++;
+ con->out_kvec_bytes += auth_len;
+ }
+ return 0;
}
/*
@@ -640,9 +648,9 @@ static void prepare_write_banner(struct ceph_messenger *msgr,
set_bit(WRITE_PENDING, &con->state);
}
-static void prepare_write_connect(struct ceph_messenger *msgr,
- struct ceph_connection *con,
- int after_banner)
+static int prepare_write_connect(struct ceph_messenger *msgr,
+ struct ceph_connection *con,
+ int after_banner)
{
unsigned global_seq = get_global_seq(con->msgr, 0);
int proto;
@@ -683,7 +691,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
con->out_more = 0;
set_bit(WRITE_PENDING, &con->state);
- prepare_connect_authorizer(con);
+ return prepare_connect_authorizer(con);
}
@@ -1065,8 +1073,10 @@ static void addr_set_port(struct sockaddr_storage *ss, int p)
switch (ss->ss_family) {
case AF_INET:
((struct sockaddr_in *)ss)->sin_port = htons(p);
+ break;
case AF_INET6:
((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
+ break;
}
}
@@ -1216,6 +1226,7 @@ static int process_connect(struct ceph_connection *con)
u64 sup_feat = con->msgr->supported_features;
u64 req_feat = con->msgr->required_features;
u64 server_feat = le64_to_cpu(con->in_reply.features);
+ int ret;
dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
@@ -1250,7 +1261,9 @@ static int process_connect(struct ceph_connection *con)
return -1;
}
con->auth_retry = 1;
- prepare_write_connect(con->msgr, con, 0);
+ ret = prepare_write_connect(con->msgr, con, 0);
+ if (ret < 0)
+ return ret;
prepare_read_connect(con);
break;
@@ -1277,6 +1290,9 @@ static int process_connect(struct ceph_connection *con)
if (con->ops->peer_reset)
con->ops->peer_reset(con);
mutex_lock(&con->mutex);
+ if (test_bit(CLOSED, &con->state) ||
+ test_bit(OPENING, &con->state))
+ return -EAGAIN;
break;
case CEPH_MSGR_TAG_RETRY_SESSION:
@@ -1341,7 +1357,9 @@ static int process_connect(struct ceph_connection *con)
* to WAIT. This shouldn't happen if we are the
* client.
*/
- pr_err("process_connect peer connecting WAIT\n");
+ pr_err("process_connect got WAIT as client\n");
+ con->error_msg = "protocol error, got WAIT as client";
+ return -1;
default:
pr_err("connect protocol error, will retry\n");
@@ -1810,6 +1828,17 @@ static int try_read(struct ceph_connection *con)
more:
dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
con->in_base_pos);
+
+ /*
+ * process_connect and process_message drop and re-take
+ * con->mutex. make sure we handle a racing close or reopen.
+ */
+ if (test_bit(CLOSED, &con->state) ||
+ test_bit(OPENING, &con->state)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
if (test_bit(CONNECTING, &con->state)) {
if (!test_bit(NEGOTIATING, &con->state)) {
dout("try_read connecting\n");
@@ -1938,8 +1967,10 @@ static void con_work(struct work_struct *work)
{
struct ceph_connection *con = container_of(work, struct ceph_connection,
work.work);
+ int ret;
mutex_lock(&con->mutex);
+restart:
if (test_and_clear_bit(BACKOFF, &con->state)) {
dout("con_work %p backing off\n", con);
if (queue_delayed_work(ceph_msgr_wq, &con->work,
@@ -1969,18 +2000,31 @@ static void con_work(struct work_struct *work)
con_close_socket(con);
}
- if (test_and_clear_bit(SOCK_CLOSED, &con->state) ||
- try_read(con) < 0 ||
- try_write(con) < 0) {
- mutex_unlock(&con->mutex);
- ceph_fault(con); /* error/fault path */
- goto done_unlocked;
- }
+ if (test_and_clear_bit(SOCK_CLOSED, &con->state))
+ goto fault;
+
+ ret = try_read(con);
+ if (ret == -EAGAIN)
+ goto restart;
+ if (ret < 0)
+ goto fault;
+
+ ret = try_write(con);
+ if (ret == -EAGAIN)
+ goto restart;
+ if (ret < 0)
+ goto fault;
done:
mutex_unlock(&con->mutex);
done_unlocked:
con->ops->put(con);
+ return;
+
+fault:
+ mutex_unlock(&con->mutex);
+ ceph_fault(con); /* error/fault path */
+ goto done_unlocked;
}
@@ -2267,6 +2311,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
m->more_to_follow = false;
m->pool = NULL;
+ /* middle */
+ m->middle = NULL;
+
+ /* data */
+ m->nr_pages = 0;
+ m->page_alignment = 0;
+ m->pages = NULL;
+ m->pagelist = NULL;
+ m->bio = NULL;
+ m->bio_iter = NULL;
+ m->bio_seg = 0;
+ m->trail = NULL;
+
/* front */
if (front_len) {
if (front_len > PAGE_CACHE_SIZE) {
@@ -2286,19 +2343,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
}
m->front.iov_len = front_len;
- /* middle */
- m->middle = NULL;
-
- /* data */
- m->nr_pages = 0;
- m->page_alignment = 0;
- m->pages = NULL;
- m->pagelist = NULL;
- m->bio = NULL;
- m->bio_iter = NULL;
- m->bio_seg = 0;
- m->trail = NULL;
-
dout("ceph_msg_new %p front %d\n", m, front_len);
return m;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 5a80f41c0cb..9cb627a4073 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -124,7 +124,7 @@ static void calc_layout(struct ceph_osd_client *osdc,
ceph_calc_raw_layout(osdc, layout, vino.snap, off,
plen, &bno, req, op);
- sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno);
+ snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
req->r_oid_len = strlen(req->r_oid);
}
@@ -470,8 +470,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
snapc, ops,
use_mempool,
GFP_NOFS, NULL, NULL);
- if (IS_ERR(req))
- return req;
+ if (!req)
+ return NULL;
/* calculate max write size */
calc_layout(osdc, vino, layout, off, plen, req, ops);
@@ -1144,6 +1144,13 @@ static void handle_osds_timeout(struct work_struct *work)
round_jiffies_relative(delay));
}
+static void complete_request(struct ceph_osd_request *req)
+{
+ if (req->r_safe_callback)
+ req->r_safe_callback(req, NULL);
+ complete_all(&req->r_safe_completion); /* fsync waiter */
+}
+
/*
* handle osd op reply. either call the callback if it is specified,
* or do the completion to wake up the waiting thread.
@@ -1226,11 +1233,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
else
complete_all(&req->r_completion);
- if (flags & CEPH_OSD_FLAG_ONDISK) {
- if (req->r_safe_callback)
- req->r_safe_callback(req, msg);
- complete_all(&req->r_safe_completion); /* fsync waiter */
- }
+ if (flags & CEPH_OSD_FLAG_ONDISK)
+ complete_request(req);
done:
dout("req=%p req->r_linger=%d\n", req, req->r_linger);
@@ -1421,6 +1425,15 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
done:
downgrade_write(&osdc->map_sem);
ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
+
+ /*
+ * subscribe to subsequent osdmap updates if full to ensure
+ * we find out when we are no longer full and stop returning
+ * ENOSPC.
+ */
+ if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
+ ceph_monc_request_next_osdmap(&osdc->client->monc);
+
send_queued(osdc);
up_read(&osdc->map_sem);
wake_up_all(&osdc->client->auth_wq);
@@ -1677,8 +1690,14 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
*/
if (req->r_sent == 0) {
rc = __map_request(osdc, req);
- if (rc < 0)
+ if (rc < 0) {
+ if (nofail) {
+ dout("osdc_start_request failed map, "
+ " will retry %lld\n", req->r_tid);
+ rc = 0;
+ }
goto out_unlock;
+ }
if (req->r_osd == NULL) {
dout("send_request %p no up osds in pg\n", req);
ceph_monc_request_next_osdmap(&osdc->client->monc);
@@ -1717,6 +1736,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
__cancel_request(req);
__unregister_request(osdc, req);
mutex_unlock(&osdc->request_mutex);
+ complete_request(req);
dout("wait_request tid %llu canceled/timed out\n", req->r_tid);
return rc;
}
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 71603ac3dff..e97c3588c3e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -765,7 +765,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
}
map->epoch++;
- map->modified = map->modified;
+ map->modified = modified;
if (newcrush) {
if (map->crush)
crush_destroy(map->crush);
@@ -830,15 +830,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
map->osd_addr[osd] = addr;
}
- /* new_down */
+ /* new_state */
ceph_decode_32_safe(p, end, len, bad);
while (len--) {
u32 osd;
+ u8 xorstate;
ceph_decode_32_safe(p, end, osd, bad);
+ xorstate = **(u8 **)p;
(*p)++; /* clean flag */
- pr_info("osd%d down\n", osd);
+ if (xorstate == 0)
+ xorstate = CEPH_OSD_UP;
+ if (xorstate & CEPH_OSD_UP)
+ pr_info("osd%d down\n", osd);
if (osd < map->max_osd)
- map->osd_state[osd] &= ~CEPH_OSD_UP;
+ map->osd_state[osd] ^= xorstate;
}
/* new_weight */