Merge ../linux-2.6

author: James Bottomley <jejb@mulgrave.il.steeleye.com> 2006-08-27 21:59:59 -0500
committer: James Bottomley <jejb@mulgrave.il.steeleye.com> 2006-08-27 21:59:59 -0500
commit: 8ce7a9c159c8c4eb480f0a65c6af753dbf9a1a70 (patch)
tree: be59573c0af3617d0cd8a7d61f0ed119e58b1156 /Documentation
parent: d2afb3ae04e36dbc6e9eb2d8bd54406ff7b6b3bd (diff)
parent: 01da5fd83d6b2c5e36b77539f6cbdd8f49849225 (diff)
26 files changed, 952 insertions, 505 deletions
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 1ae4dc0fd85..f8fe882e33d 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -59,6 +59,9 @@
 !Iinclude/linux/hrtimer.h
 !Ekernel/hrtimer.c
      </sect1>
+     <sect1><title>Workqueues and Kevents</title>
+!Ekernel/workqueue.c
+     </sect1>
      <sect1><title>Internal Functions</title>
 !Ikernel/exit.c
 !Ikernel/signal.c
@@ -300,7 +303,7 @@ X!Ekernel/module.c
      </sect1>
 
      <sect1><title>Resources Management</title>
-!Ekernel/resource.c
+!Ikernel/resource.c
      </sect1>
 
      <sect1><title>MTRR Handling</title>
@@ -312,9 +315,7 @@ X!Ekernel/module.c
 !Edrivers/pci/pci-driver.c
 !Edrivers/pci/remove.c
 !Edrivers/pci/pci-acpi.c
-<!-- kerneldoc does not understand __devinit
-X!Edrivers/pci/search.c
- -->
+!Edrivers/pci/search.c
 !Edrivers/pci/msi.c
 !Edrivers/pci/bus.c
 <!-- FIXME: Removed for now since no structured comments in source
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index c2c85bcb3d4..2cd7f02ffd0 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -10,7 +10,9 @@ kernel, the process can sometimes be daunting if you're not familiar
 with "the system."  This text is a collection of suggestions which
 can greatly increase the chances of your change being accepted.
 
-If you are submitting a driver, also read Documentation/SubmittingDrivers.
+Read Documentation/SubmitChecklist for a list of items to check
+before submitting code.  If you are submitting a driver, also read
+Documentation/SubmittingDrivers.
 
 
 
@@ -74,9 +76,6 @@ There are a number of scripts which can aid in this:
 Quilt:
 http://savannah.nongnu.org/projects/quilt
 
-Randy Dunlap's patch scripts:
-http://www.xenotime.net/linux/scripts/patching-scripts-002.tar.gz
-
 Andrew Morton's patch scripts:
 http://www.zip.com.au/~akpm/linux/patches/
 Instead of these scripts, quilt is the recommended patch management
@@ -484,7 +483,7 @@ Greg Kroah-Hartman "How to piss off a kernel subsystem maintainer".
   <http://www.kroah.com/log/2005/10/19/>
   <http://www.kroah.com/log/2006/01/11/>
 
-NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!.
+NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
   <http://marc.theaimsgroup.com/?l=linux-kernel&m=112112749912944&w=2>
 
 Kernel Documentation/CodingStyle
@@ -493,4 +492,3 @@ Kernel Documentation/CodingStyle
 Linus Torvald's mail on the canonical patch format:
   <http://lkml.org/lkml/2005/4/7/183>
 --
-Last updated on 17 Nov 2005.
diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt
index be215e58423..1443cd71d26 100644
--- a/Documentation/accounting/delay-accounting.txt
+++ b/Documentation/accounting/delay-accounting.txt
@@ -64,11 +64,13 @@ Compile the kernel with
 	CONFIG_TASK_DELAY_ACCT=y
 	CONFIG_TASKSTATS=y
 
-Enable the accounting at boot time by adding
-the following to the kernel boot options
-	delayacct
+Delay accounting is enabled by default at boot up.
+To disable, add
+   nodelayacct
+to the kernel boot options. The rest of the instructions
+below assume this has not been done.
 
-and after the system has booted up, use a utility
+After the system has booted up, use a utility
 similar to  getdelays.c to access the delays
 seen by a given task or a task group (tgid).
 The utility also allows a given command to be
diff --git a/Documentation/connector/ucon.c b/Documentation/connector/ucon.c
new file mode 100644
index 00000000000..d738cde2a8d
--- /dev/null
+++ b/Documentation/connector/ucon.c
@@ -0,0 +1,206 @@
+/*
+ * 	ucon.c
+ *
+ * Copyright (c) 2004+ Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <asm/types.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include <arpa/inet.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+
+#include <linux/connector.h>
+
+#define DEBUG
+#define NETLINK_CONNECTOR 	11
+
+#ifdef DEBUG
+#define ulog(f, a...) fprintf(stdout, f, ##a)
+#else
+#define ulog(f, a...) do {} while (0)
+#endif
+
+static int need_exit;
+static __u32 seq;
+
+static int netlink_send(int s, struct cn_msg *msg)
+{
+	struct nlmsghdr *nlh;
+	unsigned int size;
+	int err;
+	char buf[128];
+	struct cn_msg *m;
+
+	size = NLMSG_SPACE(sizeof(struct cn_msg) + msg->len);
+
+	nlh = (struct nlmsghdr *)buf;
+	nlh->nlmsg_seq = seq++;
+	nlh->nlmsg_pid = getpid();
+	nlh->nlmsg_type = NLMSG_DONE;
+	nlh->nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh));
+	nlh->nlmsg_flags = 0;
+
+	m = NLMSG_DATA(nlh);
+#if 0
+	ulog("%s: [%08x.%08x] len=%u, seq=%u, ack=%u.\n",
+	       __func__, msg->id.idx, msg->id.val, msg->len, msg->seq, msg->ack);
+#endif
+	memcpy(m, msg, sizeof(*m) + msg->len);
+
+	err = send(s, nlh, size, 0);
+	if (err == -1)
+		ulog("Failed to send: %s [%d].\n",
+			strerror(errno), errno);
+
+	return err;
+}
+
+int main(int argc, char *argv[])
+{
+	int s;
+	char buf[1024];
+	int len;
+	struct nlmsghdr *reply;
+	struct sockaddr_nl l_local;
+	struct cn_msg *data;
+	FILE *out;
+	time_t tm;
+	struct pollfd pfd;
+
+	if (argc < 2)
+		out = stdout;
+	else {
+		out = fopen(argv[1], "a+");
+		if (!out) {
+			ulog("Unable to open %s for writing: %s\n",
+				argv[1], strerror(errno));
+			out = stdout;
+		}
+	}
+
+	memset(buf, 0, sizeof(buf));
+
+	s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+	if (s == -1) {
+		perror("socket");
+		return -1;
+	}
+
+	l_local.nl_family = AF_NETLINK;
+	l_local.nl_groups = 0x123; /* bitmask of requested groups */
+	l_local.nl_pid = 0;
+
+	if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
+		perror("bind");
+		close(s);
+		return -1;
+	}
+
+#if 0
+	{
+		int on = 0x57; /* Additional group number */
+		setsockopt(s, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &on, sizeof(on));
+	}
+#endif
+	if (0) {
+		int i, j;
+
+		memset(buf, 0, sizeof(buf));
+
+		data = (struct cn_msg *)buf;
+
+		data->id.idx = 0x123;
+		data->id.val = 0x456;
+		data->seq = seq++;
+		data->ack = 0;
+		data->len = 0;
+
+		for (j=0; j<10; ++j) {
+			for (i=0; i<1000; ++i) {
+				len = netlink_send(s, data);
+			}
+
+			ulog("%d messages have been sent to %08x.%08x.\n", i, data->id.idx, data->id.val);
+		}
+
+		return 0;
+	}
+
+
+	pfd.fd = s;
+
+	while (!need_exit) {
+		pfd.events = POLLIN;
+		pfd.revents = 0;
+		switch (poll(&pfd, 1, -1)) {
+			case 0:
+				need_exit = 1;
+				break;
+			case -1:
+				if (errno != EINTR) {
+					need_exit = 1;
+					break;
+				}
+				continue;
+		}
+		if (need_exit)
+			break;
+
+		memset(buf, 0, sizeof(buf));
+		len = recv(s, buf, sizeof(buf), 0);
+		if (len == -1) {
+			perror("recv buf");
+			close(s);
+			return -1;
+		}
+		reply = (struct nlmsghdr *)buf;
+
+		switch (reply->nlmsg_type) {
+		case NLMSG_ERROR:
+			fprintf(out, "Error message received.\n");
+			fflush(out);
+			break;
+		case NLMSG_DONE:
+			data = (struct cn_msg *)NLMSG_DATA(reply);
+
+			time(&tm);
+			fprintf(out, "%.24s : [%x.%x] [%08u.%08u].\n",
+				ctime(&tm), data->id.idx, data->id.val, data->seq, data->ack);
+			fflush(out);
+			break;
+		default:
+			break;
+		}
+	}
+
+	close(s);
+	return 0;
+}
diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt
index 7fedc00c3d3..555c8cf3650 100644
--- a/Documentation/cpu-freq/user-guide.txt
+++ b/Documentation/cpu-freq/user-guide.txt
@@ -153,10 +153,13 @@ scaling_governor,		and by "echoing" the name of another
 				that some governors won't load - they only
 				work on some specific architectures or
 				processors.
-scaling_min_freq and 
+scaling_min_freq and
 scaling_max_freq		show the current "policy limits" (in
 				kHz). By echoing new values into these
 				files, you can change these limits.
+				NOTE: when setting a policy you need to
+				first set scaling_max_freq, then
+				scaling_min_freq.
 
 
 If you have selected the "userspace" governor which allows you to
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index 1bcf69996c9..bc107cb157a 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -251,16 +251,24 @@ A: This is what you would need in your kernel code to receive notifications.
 		return NOTIFY_OK;
 	}
 
-	static struct notifier_block foobar_cpu_notifer =
+	static struct notifier_block __cpuinitdata foobar_cpu_notifer =
 	{
 	   .notifier_call = foobar_cpu_callback,
 	};
 
+You need to call register_cpu_notifier() from your init function.
+Init functions could be of two types:
+1. early init (init function called when only the boot processor is online).
+2. late init (init function called _after_ all the CPUs are online).
 
-In your init function,
+For the first case, you should add the following to your init function
 
 	register_cpu_notifier(&foobar_cpu_notifier);
 
+For the second case, you should add the following to your init function
+
+	register_hotcpu_notifier(&foobar_cpu_notifier);
+
 You can fail PREPARE notifiers if something doesn't work to prepare resources.
 This will stop the activity and send a following CANCELED event back.
 
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 159e2a0c3e8..76b44290c15 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -217,6 +217,12 @@ exclusive cpuset.  Also, the use of a Linux virtual file system (vfs)
 to represent the cpuset hierarchy provides for a familiar permission
 and name space for cpusets, with a minimum of additional kernel code.
 
+The cpus file in the root (top_cpuset) cpuset is read-only.
+It automatically tracks the value of cpu_online_map, using a CPU
+hotplug notifier.  If and when memory nodes can be hotplugged,
+we expect to make the mems file in the root cpuset read-only
+as well, and have it track the value of node_online_map.
+
 
 1.4 What are exclusive cpusets ?
 --------------------------------
diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index 4aaf68fafeb..66c725f530f 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -2565,10 +2565,10 @@ Your cooperation is appreciated.
 		243 = /dev/usb/dabusb3	Fourth dabusb device
 
 180 block	USB block devices
-		0 = /dev/uba		First USB block device
-		8 = /dev/ubb		Second USB block device
-		16 = /dev/ubc		Thrid USB block device
-		...
+		  0 = /dev/uba		First USB block device
+		  8 = /dev/ubb		Second USB block device
+		 16 = /dev/ubc		Third USB block device
+		    ...
 
 181 char	Conrad Electronic parallel port radio clocks
 		  0 = /dev/pcfclock0	First Conrad radio clock
diff --git a/Documentation/fb/imacfb.txt b/Documentation/fb/imacfb.txt
new file mode 100644
index 00000000000..759028545a7
--- /dev/null
+++ b/Documentation/fb/imacfb.txt
@@ -0,0 +1,31 @@
+
+What is imacfb?
+===============
+
+This is a generic EFI platform driver for Intel based Apple computers.
+Imacfb is only for EFI booted Intel Macs.
+
+Supported Hardware
+==================
+
+iMac 17"/20"
+Macbook
+Macbook Pro 15"/17"
+MacMini
+
+How to use it?
+==============
+
+Imacfb does not have any kind of autodetection of your machine.
+You have to add the fillowing kernel parameters in your elilo.conf:
+	Macbook :
+		video=imacfb:macbook
+	MacMini :
+		video=imacfb:mini
+	Macbook Pro 15", iMac 17" :
+		video=imacfb:i17
+	Macbook Pro 17", iMac 20" :
+		video=imacfb:i20
+
+--
+Edgar Hucek <gimli@dark-green.com>
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 66fdc0744fe..16dec61d767 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -62,8 +62,8 @@ ramfs-rootfs-initramfs.txt
 	- info on the 'in memory' filesystems ramfs, rootfs and initramfs.
 reiser4.txt
 	- info on the Reiser4 filesystem based on dancing tree algorithms.
-relayfs.txt
-	- info on relayfs, for efficient streaming from kernel to user space.
+relay.txt
+	- info on relay, for efficient streaming from kernel to user space.
 romfs.txt
 	- description of the ROMFS filesystem.
 smbfs.txt
diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt
new file mode 100644
index 00000000000..d6788dae034
--- /dev/null
+++ b/Documentation/filesystems/relay.txt
@@ -0,0 +1,479 @@
+relay interface (formerly relayfs)
+==================================
+
+The relay interface provides a means for kernel applications to
+efficiently log and transfer large quantities of data from the kernel
+to userspace via user-defined 'relay channels'.
+
+A 'relay channel' is a kernel->user data relay mechanism implemented
+as a set of per-cpu kernel buffers ('channel buffers'), each
+represented as a regular file ('relay file') in user space.  Kernel
+clients write into the channel buffers using efficient write
+functions; these automatically log into the current cpu's channel
+buffer.  User space applications mmap() or read() from the relay files
+and retrieve the data as it becomes available.  The relay files
+themselves are files created in a host filesystem, e.g. debugfs, and
+are associated with the channel buffers using the API described below.
+
+The format of the data logged into the channel buffers is completely
+up to the kernel client; the relay interface does however provide
+hooks which allow kernel clients to impose some structure on the
+buffer data.  The relay interface doesn't implement any form of data
+filtering - this also is left to the kernel client.  The purpose is to
+keep things as simple as possible.
+
+This document provides an overview of the relay interface API.  The
+details of the function parameters are documented along with the
+functions in the relay interface code - please see that for details.
+
+Semantics
+=========
+
+Each relay channel has one buffer per CPU, each buffer has one or more
+sub-buffers.  Messages are written to the first sub-buffer until it is
+too full to contain a new message, in which case it it is written to
+the next (if available).  Messages are never split across sub-buffers.
+At this point, userspace can be notified so it empties the first
+sub-buffer, while the kernel continues writing to the next.
+
+When notified that a sub-buffer is full, the kernel knows how many
+bytes of it are padding i.e. unused space occurring because a complete
+message couldn't fit into a sub-buffer.  Userspace can use this
+knowledge to copy only valid data.
+
+After copying it, userspace can notify the kernel that a sub-buffer
+has been consumed.
+
+A relay channel can operate in a mode where it will overwrite data not
+yet collected by userspace, and not wait for it to be consumed.
+
+The relay channel itself does not provide for communication of such
+data between userspace and kernel, allowing the kernel side to remain
+simple and not impose a single interface on userspace.  It does
+provide a set of examples and a separate helper though, described
+below.
+
+The read() interface both removes padding and internally consumes the
+read sub-buffers; thus in cases where read(2) is being used to drain
+the channel buffers, special-purpose communication between kernel and
+user isn't necessary for basic operation.
+
+One of the major goals of the relay interface is to provide a low
+overhead mechanism for conveying kernel data to userspace.  While the
+read() interface is easy to use, it's not as efficient as the mmap()
+approach; the example code attempts to make the tradeoff between the
+two approaches as small as possible.
+
+klog and relay-apps example code
+================================
+
+The relay interface itself is ready to use, but to make things easier,
+a couple simple utility functions and a set of examples are provided.
+
+The relay-apps example tarball, available on the relay sourceforge
+site, contains a set of self-contained examples, each consisting of a
+pair of .c files containing boilerplate code for each of the user and
+kernel sides of a relay application.  When combined these two sets of
+boilerplate code provide glue to easily stream data to disk, without
+having to bother with mundane housekeeping chores.
+
+The 'klog debugging functions' patch (klog.patch in the relay-apps
+tarball) provides a couple of high-level logging functions to the
+kernel which allow writing formatted text or raw data to a channel,
+regardless of whether a channel to write into exists or not, or even
+whether the relay interface is compiled into the kernel or not.  These
+functions allow you to put unconditional 'trace' statements anywhere
+in the kernel or kernel modules; only when there is a 'klog handler'
+registered will data actually be logged (see the klog and kleak
+examples for details).
+
+It is of course possible to use the relay interface from scratch,
+i.e. without using any of the relay-apps example code or klog, but
+you'll have to implement communication between userspace and kernel,
+allowing both to convey the state of buffers (full, empty, amount of
+padding).  The read() interface both removes padding and internally
+consumes the read sub-buffers; thus in cases where read(2) is being
+used to drain the channel buffers, special-purpose communication
+between kernel and user isn't necessary for basic operation.  Things
+such as buffer-full conditions would still need to be communicated via
+some channel though.
+
+klog and the relay-apps examples can be found in the relay-apps
+tarball on http://relayfs.sourceforge.net
+
+The relay interface user space API
+==================================
+
+The relay interface implements basic file operations for user space
+access to relay channel buffer data.  Here are the file operations
+that are available and some comments regarding their behavior:
+
+open()	    enables user to open an _existing_ channel buffer.
+
+mmap()      results in channel buffer being mapped into the caller's
+	    memory space. Note that you can't do a partial mmap - you
+	    must map the entire file, which is NRBUF * SUBBUFSIZE.
+
+read()      read the contents of a channel buffer.  The bytes read are
+	    'consumed' by the reader, i.e. they won't be available
+	    again to subsequent reads.  If the channel is being used
+	    in no-overwrite mode (the default), it can be read at any
+	    time even if there's an active kernel writer.  If the
+	    channel is being used in overwrite mode and there are
+	    active channel writers, results may be unpredictable -
+	    users should make sure that all logging to the channel has
+	    ended before using read() with overwrite mode.  Sub-buffer
+	    padding is automatically removed and will not be seen by
+	    the reader.
+
+sendfile()  transfer data from a channel buffer to an output file
+	    descriptor. Sub-buffer padding is automatically removed
+	    and will not be seen by the reader.
+
+poll()      POLLIN/POLLRDNORM/POLLERR supported.  User applications are
+	    notified when sub-buffer boundaries are crossed.
+
+close()     decrements the channel buffer's refcount.  When the refcount
+	    reaches 0, i.e. when no process or kernel client has the
+	    buffer open, the channel buffer is freed.
+
+In order for a user application to make use of relay files, the
+host filesystem must be mounted.  For example,
+
+	mount -t debugfs debugfs /debug
+
+NOTE:   the host filesystem doesn't need to be mounted for kernel
+	clients to create or use channels - it only needs to be
+	mounted when user space applications need access to the buffer
+	data.
+
+
+The relay interface kernel API
+==============================
+
+Here's a summary of the API the relay interface provides to in-kernel clients:
+
+TBD(curr. line MT:/API/)
+  channel management functions:
+
+    relay_open(base_filename, parent, subbuf_size, n_subbufs,
+               callbacks)
+    relay_close(chan)
+    relay_flush(chan)
+    relay_reset(chan)
+
+  channel management typically called on instigation of userspace:
+
+    relay_subbufs_consumed(chan, cpu, subbufs_consumed)
+
+  write functions:
+
+    relay_write(chan, data, length)
+    __relay_write(chan, data, length)
+    relay_reserve(chan, length)
+
+  callbacks:
+
+    subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
+    buf_mapped(buf, filp)
+    buf_unmapped(buf, filp)
+    create_buf_file(filename, parent, mode, buf, is_global)
+    remove_buf_file(dentry)
+
+  helper functions:
+
+    relay_buf_full(buf)
+    subbuf_start_reserve(buf, length)
+
+
+Creating a channel
+------------------
+
+relay_open() is used to create a channel, along with its per-cpu
+channel buffers.  Each channel buffer will have an associated file
+created for it in the host filesystem, which can be and mmapped or
+read from in user space.  The files are named basename0...basenameN-1
+where N is the number of online cpus, and by default will be created
+in the root of the filesystem (if the parent param is NULL).  If you
+want a directory structure to contain your relay files, you should
+create it using the host filesystem's directory creation function,
+e.g. debugfs_create_dir(), and pass the parent directory to
+relay_open().  Users are responsible for cleaning up any directory
+structure they create, when the channel is closed - again the host
+filesystem's directory removal functions should be used for that,
+e.g. debugfs_remove().
+
+In order for a channel to be created and the host filesystem's files
+associated with its channel buffers, the user must provide definitions
+for two callback functions, create_buf_file() and remove_buf_file().
+create_buf_file() is called once for each per-cpu buffer from
+relay_open() and allows the user to create the file which will be used
+to represent the corresponding channel buffer.  The callback should
+return the dentry of the file created to represent the channel buffer.
+remove_buf_file() must also be defined; it's responsible for deleting
+the file(s) created in create_buf_file() and is called during
+relay_close().
+
+Here are some typical definitions for these callbacks, in this case
+using debugfs:
+
+/*
+ * create_buf_file() callback.  Creates relay file in debugfs.
+ */
+static struct dentry *create_buf_file_handler(const char *filename,
+                                              struct dentry *parent,
+                                              int mode,
+                                              struct rchan_buf *buf,
+                                              int *is_global)
+{
+        return debugfs_create_file(filename, mode, parent, buf,
+	                           &relay_file_operations);
+}
+
+/*
+ * remove_buf_file() callback.  Removes relay file from debugfs.
+ */
+static int remove_buf_file_handler(struct dentry *dentry)
+{
+        debugfs_remove(dentry);
+
+        return 0;
+}
+
+/*
+ * relay interface callbacks
+ */
+static struct rchan_callbacks relay_callbacks =
+{
+        .create_buf_file = create_buf_file_handler,
+        .remove_buf_file = remove_buf_file_handler,
+};
+
+And an example relay_open() invocation using them:
+
+  chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks);
+
+If the create_buf_file() callback fails, or isn't defined, channel
+creation and thus relay_open() will fail.
+
+The total size of each per-cpu buffer is calculated by multiplying the
+number of sub-buffers by the sub-buffer size passed into relay_open().
+The idea behind sub-buffers is that they're basically an extension of
+double-buffering to N buffers, and they also allow applications to
+easily implement random-access-on-buffer-boundary schemes, which can
+be important for some high-volume applications.  The number and size
+of sub-buffers is completely dependent on the application and even for
+the same application, different conditions will warrant different
+values for these parameters at different times.  Typically, the right
+values to use are best decided after some experimentation; in general,
+though, it's safe to assume that having only 1 sub-buffer is a bad
+idea - you're guaranteed to either overwrite data or lose events
+depending on the channel mode being used.
+
+The create_buf_file() implementation can also be defined in such a way
+as to allow the creation of a single 'global' buffer instead of the
+default per-cpu set.  This can be useful for applications interested
+mainly in seeing the relative ordering of system-wide events without
+the need to bother with saving explicit timestamps for the purpose of
+merging/sorting per-cpu files in a postprocessing step.
+
+To have relay_open() create a global buffer, the create_buf_file()
+implementation should set the value of the is_global outparam to a
+non-zero value in addition to creating the file that will be used to
+represent the single buffer.  In the case of a global buffer,
+create_buf_file() and remove_buf_file() will be called only once.  The
+normal channel-writing functions, e.g. relay_write(), can still be
+used - writes from any cpu will transparently end up in the global
+buffer - but since it is a global buffer, callers should make sure
+they use the proper locking for such a buffer, either by wrapping
+writes in a spinlock, or by copying a write function from relay.h and
+creating a local version that internally does the proper locking.
+
+Channel 'modes'
+---------------
+
+relay channels can be used in either of two modes - 'overwrite' or
+'no-overwrite'.  The mode is entirely determined by the implementation
+of the subbuf_start() callback, as described below.  The default if no
+subbuf_start() callback is defined is 'no-overwrite' mode.  If the
+default mode suits your needs, and you plan to use the read()
+interface to retrieve channel data, you can ignore the details of this
+section, as it pertains mainly to mmap() implementations.
+
+In 'overwrite' mode, also known as 'flight recorder' mode, writes
+continuously cycle around the buffer and will never fail, but will
+unconditionally overwrite old data regardless of whether it's actually
+been consumed.  In no-overwrite mode, writes will fail, i.e. data will
+be lost, if the number of unconsumed sub-buffers equals the total
+number of sub-buffers in the channel.  It should be clear that if
+there is no consumer or if the consumer can't consume sub-buffers fast
+enough, data will be lost in either case; the only difference is
+whether data is lost from the beginning or the end of a buffer.
+
+As explained above, a relay channel is made of up one or more
+per-cpu channel buffers, each implemented as a circular buffer
+subdivided into one or more sub-buffers.  Messages are written into
+the current sub-buffer of the channel's current per-cpu buffer via the
+write functions described below.  Whenever a message can't fit into
+the current sub-buffer, because there's no room left for it, the
+client is notified via the subbuf_start() callback that a switch to a
+new sub-buffer is about to occur.  The client uses this callback to 1)
+initialize the next sub-buffer if appropriate 2) finalize the previous
+sub-buffer if appropriate and 3) return a boolean value indicating
+whether or not to actually move on to the next sub-buffer.
+
+To implement 'no-overwrite' mode, the userspace client would provide
+an implementation of the subbuf_start() callback something like the
+following:
+
+static int subbuf_start(struct rchan_buf *buf,
+                        void *subbuf,
+			void *prev_subbuf,
+			unsigned int prev_padding)
+{
+	if (prev_subbuf)
+		*((unsigned *)prev_subbuf) = prev_padding;
+
+	if (relay_buf_full(buf))
+		return 0;
+
+	subbuf_start_reserve(buf, sizeof(unsigned int));
+
+	retur
author	James Bottomley <jejb@mulgrave.il.steeleye.com>	2006-08-27 21:59:59 -0500
committer	James Bottomley <jejb@mulgrave.il.steeleye.com>	2006-08-27 21:59:59 -0500
commit	8ce7a9c159c8c4eb480f0a65c6af753dbf9a1a70 (patch)
tree	be59573c0af3617d0cd8a7d61f0ed119e58b1156 /Documentation
parent	d2afb3ae04e36dbc6e9eb2d8bd54406ff7b6b3bd (diff)
parent	01da5fd83d6b2c5e36b77539f6cbdd8f49849225 (diff)