/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
/*
* lowcomms.c
*
* This is the "low-level" comms layer.
*
* It is responsible for sending/receiving messages
* from other nodes in the cluster.
*
* Cluster nodes are referred to by their nodeids. nodeids are
* simply 32 bit numbers to the locking module - if they need to
* be expanded for the cluster infrastructure then that is it's
* responsibility. It is this layer's
* responsibility to resolve these into IP address or
* whatever it needs for inter-node communication.
*
* The comms level is two kernel threads that deal mainly with
* the receiving of messages from other nodes and passing them
* up to the mid-level comms layer (which understands the
* message format) for execution by the locking core, and
* a send thread which does all the setting up of connections
* to remote nodes and the sending of data. Threads are not allowed
* to send their own data because it may cause them to wait in times
* of high load. Also, this way, the sending thread can collect together
* messages bound for one node and send them in one block.
*
* I don't see any problem with the recv thread executing the locking
* code on behalf of remote processes as the locking code is
* short, efficient and never (well, hardly ever) waits.
*
*/
#include <asm/ioctls.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/sctp/user.h>
#include <linux/pagemap.h>
#include <linux/socket.h>
#include <linux/idr.h>
#include "dlm_internal.h"
#include "lowcomms.h"
#include "config.h"
#include "midcomms.h"
static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
static int dlm_local_count;
static int dlm_local_nodeid;
/* One of these per connected node */
#define NI_INIT_PENDING 1
#define NI_WRITE_PENDING 2
struct nodeinfo {
spinlock_t lock;
sctp_assoc_t assoc_id;
unsigned long flags;
struct list_head write_list; /* nodes with pending writes */
struct list_head writequeue; /* outgoing writequeue_entries */
spinlock_t writequeue_lock;
int nodeid;
};
static DEFINE_IDR(nodeinfo_idr);
static struct rw_semaphore nodeinfo_lock;
static int max_nodeid;
struct cbuf {
unsigned base;
unsigned len;
unsigned mask;
};
/* Just the one of these, now. But this struct keeps
the connection-specific variables together */
#define CF_READ_PENDING 1
struct connection {
struct socket *sock;
unsigned long flags;
struct page *rx_page;
atomic_t waiting_requests;
struct cbuf cb;
int eagain_flag;
};
/* An entry waiting to be sent */
struct writequeue_entry {
struct list_head list;
struct page *page;
int offset;
int len;
int end;
int users;
struct nodeinfo *ni;
};
#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
#define CBUF_EMPTY(cb) ((cb)->len == 0)
#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
#define CBUF_INIT(cb, size) \
do { \
(cb)->base = (cb)->len = 0; \
(cb)->mask = ((size)-1); \
} while(0)
#define CBUF_EAT(cb, n) \
do { \
(cb)->len -= (n); \
(cb)->base += (n); \
(cb)->base &= (cb)->mask; \
} while(0)
/* List of nodes which have writes pending */
static struct list_head write_nodes;
static spinlock_t write_nodes_lock;
/* Maximum number of incoming messages to process before
* doing a schedule()
*/
#define MAX_RX_MSG_COUNT 25
/* Manage daemons */
static struct task_struct *recv_task;
static struct task_struct *send_task;
static wait_queue_head_t lowcomms_recv_wait;
static atomic_t accepting;
/* The SCTP connection */
static struct connection sctp_con;
static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
{
struct sockaddr_storage addr;
int error;
if (!dlm_local_count)
return -1;
error = dlm_nodeid_to_addr(nodeid, &addr