/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
/*
* lowcomms.c
*
* This is the "low-level" comms layer.
*
* It is responsible for sending/receiving messages
* from other nodes in the cluster.
*
* Cluster nodes are referred to by their nodeids. nodeids are
* simply 32 bit numbers to the locking module - if they need to
* be expanded for the cluster infrastructure then that is it's
* responsibility. It is this layer's
* responsibility to resolve these into IP address or
* whatever it needs for inter-node communication.
*
* The comms level is two kernel threads that deal mainly with
* the receiving of messages from other nodes and passing them
* up to the mid-level comms layer (which understands the
* message format) for execution by the locking core, and
* a send thread which does all the setting up of connections
* to remote nodes and the sending of data. Threads are not allowed
* to send their own data because it may cause them to wait in times
* of high load. Also, this way, the sending thread can collect together
* messages bound for one node and send them in one block.
*
* I don't see any problem with the recv thread executing the locking
* code on behalf of remote processes as the locking code is
* short, efficient and never (well, hardly ever) waits.
*
*/
#include <asm/ioctls.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/sctp/user.h>
#include <linux/pagemap.h>
#include <linux/socket.h>
#include <linux/idr.h>
#include "dlm_internal.h"
#include "lowcomms.h"
#include "config.h"
#include "midcomms.h"
static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
static int dlm_local_count;
static int dlm_local_nodeid;
/* One of these per connected node */
#define NI_INIT_PENDING 1
#define NI_WRITE_PENDING 2
struct nodeinfo {
spinlock_t lock;
sctp_assoc_t assoc_id;
unsigned long flags;
struct list_head write_list; /* nodes with pending writes */
struct list_head writequeue; /* outgoing writequeue_entries */
spinlock_t writequeue_lock;
int nodeid;
};
static DEFINE_IDR(nodeinfo_idr);
static DECLARE_RWSEM(nodeinfo_lock);
static int max_nodeid;
struct cbuf {
unsigned int base;
unsigned int len;
unsigned int mask;
};
/* Just the one of these, now. But this struct keeps
the connection-specific variables together */
#define CF_READ_PENDING 1
struct connection {
struct socket *sock;
unsigned long flags;
struct page *rx_page;
atomic_t waiting_requests;
struct cbuf cb;
int eagain_flag;
};
/* An entry waiting to be sent */
struct writequeue_entry {
struct list_head list;
struct page *page;
int offset;
int len;
int end;
int users;
struct nodeinfo *ni;
};
static void cbuf_add(struct cbuf *cb, int n)
{
cb->len += n;
}
static int cbuf_data(struct cbuf *cb)
{
return ((cb->base + cb->len) & cb->mask);
}
static void cbuf_init(struct cbuf *cb, int size)
{
cb->base = cb->len = 0;
cb->mask = size-1;
}
static void cbuf_eat(struct cbuf *cb, int n)
{
cb->len -= n;
cb->base += n;
cb->base &= cb->mask;
}
/* List of nodes which have writes pending */
static LIST_HEAD(write_nodes);
static DEFINE_SPINLOCK(write_nodes_lock