diff options
author | James Bottomley <jejb@mulgrave.il.steeleye.com> | 2006-08-27 21:59:59 -0500 |
---|---|---|
committer | James Bottomley <jejb@mulgrave.il.steeleye.com> | 2006-08-27 21:59:59 -0500 |
commit | 8ce7a9c159c8c4eb480f0a65c6af753dbf9a1a70 (patch) | |
tree | be59573c0af3617d0cd8a7d61f0ed119e58b1156 /Documentation | |
parent | d2afb3ae04e36dbc6e9eb2d8bd54406ff7b6b3bd (diff) | |
parent | 01da5fd83d6b2c5e36b77539f6cbdd8f49849225 (diff) |
Merge ../linux-2.6
Diffstat (limited to 'Documentation')
26 files changed, 952 insertions, 505 deletions
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index 1ae4dc0fd85..f8fe882e33d 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -59,6 +59,9 @@ !Iinclude/linux/hrtimer.h !Ekernel/hrtimer.c </sect1> + <sect1><title>Workqueues and Kevents</title> +!Ekernel/workqueue.c + </sect1> <sect1><title>Internal Functions</title> !Ikernel/exit.c !Ikernel/signal.c @@ -300,7 +303,7 @@ X!Ekernel/module.c </sect1> <sect1><title>Resources Management</title> -!Ekernel/resource.c +!Ikernel/resource.c </sect1> <sect1><title>MTRR Handling</title> @@ -312,9 +315,7 @@ X!Ekernel/module.c !Edrivers/pci/pci-driver.c !Edrivers/pci/remove.c !Edrivers/pci/pci-acpi.c -<!-- kerneldoc does not understand __devinit -X!Edrivers/pci/search.c - --> +!Edrivers/pci/search.c !Edrivers/pci/msi.c !Edrivers/pci/bus.c <!-- FIXME: Removed for now since no structured comments in source diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index c2c85bcb3d4..2cd7f02ffd0 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -10,7 +10,9 @@ kernel, the process can sometimes be daunting if you're not familiar with "the system." This text is a collection of suggestions which can greatly increase the chances of your change being accepted. -If you are submitting a driver, also read Documentation/SubmittingDrivers. +Read Documentation/SubmitChecklist for a list of items to check +before submitting code. If you are submitting a driver, also read +Documentation/SubmittingDrivers. @@ -74,9 +76,6 @@ There are a number of scripts which can aid in this: Quilt: http://savannah.nongnu.org/projects/quilt -Randy Dunlap's patch scripts: -http://www.xenotime.net/linux/scripts/patching-scripts-002.tar.gz - Andrew Morton's patch scripts: http://www.zip.com.au/~akpm/linux/patches/ Instead of these scripts, quilt is the recommended patch management @@ -484,7 +483,7 @@ Greg Kroah-Hartman "How to piss off a kernel subsystem maintainer". <http://www.kroah.com/log/2005/10/19/> <http://www.kroah.com/log/2006/01/11/> -NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!. +NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! <http://marc.theaimsgroup.com/?l=linux-kernel&m=112112749912944&w=2> Kernel Documentation/CodingStyle @@ -493,4 +492,3 @@ Kernel Documentation/CodingStyle Linus Torvald's mail on the canonical patch format: <http://lkml.org/lkml/2005/4/7/183> -- -Last updated on 17 Nov 2005. diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt index be215e58423..1443cd71d26 100644 --- a/Documentation/accounting/delay-accounting.txt +++ b/Documentation/accounting/delay-accounting.txt @@ -64,11 +64,13 @@ Compile the kernel with CONFIG_TASK_DELAY_ACCT=y CONFIG_TASKSTATS=y -Enable the accounting at boot time by adding -the following to the kernel boot options - delayacct +Delay accounting is enabled by default at boot up. +To disable, add + nodelayacct +to the kernel boot options. The rest of the instructions +below assume this has not been done. -and after the system has booted up, use a utility +After the system has booted up, use a utility similar to getdelays.c to access the delays seen by a given task or a task group (tgid). The utility also allows a given command to be diff --git a/Documentation/connector/ucon.c b/Documentation/connector/ucon.c new file mode 100644 index 00000000000..d738cde2a8d --- /dev/null +++ b/Documentation/connector/ucon.c @@ -0,0 +1,206 @@ +/* + * ucon.c + * + * Copyright (c) 2004+ Evgeniy Polyakov <johnpol@2ka.mipt.ru> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <asm/types.h> + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/poll.h> + +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include <arpa/inet.h> + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <time.h> + +#include <linux/connector.h> + +#define DEBUG +#define NETLINK_CONNECTOR 11 + +#ifdef DEBUG +#define ulog(f, a...) fprintf(stdout, f, ##a) +#else +#define ulog(f, a...) do {} while (0) +#endif + +static int need_exit; +static __u32 seq; + +static int netlink_send(int s, struct cn_msg *msg) +{ + struct nlmsghdr *nlh; + unsigned int size; + int err; + char buf[128]; + struct cn_msg *m; + + size = NLMSG_SPACE(sizeof(struct cn_msg) + msg->len); + + nlh = (struct nlmsghdr *)buf; + nlh->nlmsg_seq = seq++; + nlh->nlmsg_pid = getpid(); + nlh->nlmsg_type = NLMSG_DONE; + nlh->nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh)); + nlh->nlmsg_flags = 0; + + m = NLMSG_DATA(nlh); +#if 0 + ulog("%s: [%08x.%08x] len=%u, seq=%u, ack=%u.\n", + __func__, msg->id.idx, msg->id.val, msg->len, msg->seq, msg->ack); +#endif + memcpy(m, msg, sizeof(*m) + msg->len); + + err = send(s, nlh, size, 0); + if (err == -1) + ulog("Failed to send: %s [%d].\n", + strerror(errno), errno); + + return err; +} + +int main(int argc, char *argv[]) +{ + int s; + char buf[1024]; + int len; + struct nlmsghdr *reply; + struct sockaddr_nl l_local; + struct cn_msg *data; + FILE *out; + time_t tm; + struct pollfd pfd; + + if (argc < 2) + out = stdout; + else { + out = fopen(argv[1], "a+"); + if (!out) { + ulog("Unable to open %s for writing: %s\n", + argv[1], strerror(errno)); + out = stdout; + } + } + + memset(buf, 0, sizeof(buf)); + + s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); + if (s == -1) { + perror("socket"); + return -1; + } + + l_local.nl_family = AF_NETLINK; + l_local.nl_groups = 0x123; /* bitmask of requested groups */ + l_local.nl_pid = 0; + + if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) { + perror("bind"); + close(s); + return -1; + } + +#if 0 + { + int on = 0x57; /* Additional group number */ + setsockopt(s, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &on, sizeof(on)); + } +#endif + if (0) { + int i, j; + + memset(buf, 0, sizeof(buf)); + + data = (struct cn_msg *)buf; + + data->id.idx = 0x123; + data->id.val = 0x456; + data->seq = seq++; + data->ack = 0; + data->len = 0; + + for (j=0; j<10; ++j) { + for (i=0; i<1000; ++i) { + len = netlink_send(s, data); + } + + ulog("%d messages have been sent to %08x.%08x.\n", i, data->id.idx, data->id.val); + } + + return 0; + } + + + pfd.fd = s; + + while (!need_exit) { + pfd.events = POLLIN; + pfd.revents = 0; + switch (poll(&pfd, 1, -1)) { + case 0: + need_exit = 1; + break; + case -1: + if (errno != EINTR) { + need_exit = 1; + break; + } + continue; + } + if (need_exit) + break; + + memset(buf, 0, sizeof(buf)); + len = recv(s, buf, sizeof(buf), 0); + if (len == -1) { + perror("recv buf"); + close(s); + return -1; + } + reply = (struct nlmsghdr *)buf; + + switch (reply->nlmsg_type) { + case NLMSG_ERROR: + fprintf(out, "Error message received.\n"); + fflush(out); + break; + case NLMSG_DONE: + data = (struct cn_msg *)NLMSG_DATA(reply); + + time(&tm); + fprintf(out, "%.24s : [%x.%x] [%08u.%08u].\n", + ctime(&tm), data->id.idx, data->id.val, data->seq, data->ack); + fflush(out); + break; + default: + break; + } + } + + close(s); + return 0; +} diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 7fedc00c3d3..555c8cf3650 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -153,10 +153,13 @@ scaling_governor, and by "echoing" the name of another that some governors won't load - they only work on some specific architectures or processors. -scaling_min_freq and +scaling_min_freq and scaling_max_freq show the current "policy limits" (in kHz). By echoing new values into these files, you can change these limits. + NOTE: when setting a policy you need to + first set scaling_max_freq, then + scaling_min_freq. If you have selected the "userspace" governor which allows you to diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 1bcf69996c9..bc107cb157a 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -251,16 +251,24 @@ A: This is what you would need in your kernel code to receive notifications. return NOTIFY_OK; } - static struct notifier_block foobar_cpu_notifer = + static struct notifier_block __cpuinitdata foobar_cpu_notifer = { .notifier_call = foobar_cpu_callback, }; +You need to call register_cpu_notifier() from your init function. +Init functions could be of two types: +1. early init (init function called when only the boot processor is online). +2. late init (init function called _after_ all the CPUs are online). -In your init function, +For the first case, you should add the following to your init function register_cpu_notifier(&foobar_cpu_notifier); +For the second case, you should add the following to your init function + + register_hotcpu_notifier(&foobar_cpu_notifier); + You can fail PREPARE notifiers if something doesn't work to prepare resources. This will stop the activity and send a following CANCELED event back. diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index 159e2a0c3e8..76b44290c15 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt @@ -217,6 +217,12 @@ exclusive cpuset. Also, the use of a Linux virtual file system (vfs) to represent the cpuset hierarchy provides for a familiar permission and name space for cpusets, with a minimum of additional kernel code. +The cpus file in the root (top_cpuset) cpuset is read-only. +It automatically tracks the value of cpu_online_map, using a CPU +hotplug notifier. If and when memory nodes can be hotplugged, +we expect to make the mems file in the root cpuset read-only +as well, and have it track the value of node_online_map. + 1.4 What are exclusive cpusets ? -------------------------------- diff --git a/Documentation/devices.txt b/Documentation/devices.txt index 4aaf68fafeb..66c725f530f 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt @@ -2565,10 +2565,10 @@ Your cooperation is appreciated. 243 = /dev/usb/dabusb3 Fourth dabusb device 180 block USB block devices - 0 = /dev/uba First USB block device - 8 = /dev/ubb Second USB block device - 16 = /dev/ubc Thrid USB block device - ... + 0 = /dev/uba First USB block device + 8 = /dev/ubb Second USB block device + 16 = /dev/ubc Third USB block device + ... 181 char Conrad Electronic parallel port radio clocks 0 = /dev/pcfclock0 First Conrad radio clock diff --git a/Documentation/fb/imacfb.txt b/Documentation/fb/imacfb.txt new file mode 100644 index 00000000000..759028545a7 --- /dev/null +++ b/Documentation/fb/imacfb.txt @@ -0,0 +1,31 @@ + +What is imacfb? +=============== + +This is a generic EFI platform driver for Intel based Apple computers. +Imacfb is only for EFI booted Intel Macs. + +Supported Hardware +================== + +iMac 17"/20" +Macbook +Macbook Pro 15"/17" +MacMini + +How to use it? +============== + +Imacfb does not have any kind of autodetection of your machine. +You have to add the fillowing kernel parameters in your elilo.conf: + Macbook : + video=imacfb:macbook + MacMini : + video=imacfb:mini + Macbook Pro 15", iMac 17" : + video=imacfb:i17 + Macbook Pro 17", iMac 20" : + video=imacfb:i20 + +-- +Edgar Hucek <gimli@dark-green.com> diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 66fdc0744fe..16dec61d767 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -62,8 +62,8 @@ ramfs-rootfs-initramfs.txt - info on the 'in memory' filesystems ramfs, rootfs and initramfs. reiser4.txt - info on the Reiser4 filesystem based on dancing tree algorithms. -relayfs.txt - - info on relayfs, for efficient streaming from kernel to user space. +relay.txt + - info on relay, for efficient streaming from kernel to user space. romfs.txt - description of the ROMFS filesystem. smbfs.txt diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt new file mode 100644 index 00000000000..d6788dae034 --- /dev/null +++ b/Documentation/filesystems/relay.txt @@ -0,0 +1,479 @@ +relay interface (formerly relayfs) +================================== + +The relay interface provides a means for kernel applications to +efficiently log and transfer large quantities of data from the kernel +to userspace via user-defined 'relay channels'. + +A 'relay channel' is a kernel->user data relay mechanism implemented +as a set of per-cpu kernel buffers ('channel buffers'), each +represented as a regular file ('relay file') in user space. Kernel +clients write into the channel buffers using efficient write +functions; these automatically log into the current cpu's channel +buffer. User space applications mmap() or read() from the relay files +and retrieve the data as it becomes available. The relay files +themselves are files created in a host filesystem, e.g. debugfs, and +are associated with the channel buffers using the API described below. + +The format of the data logged into the channel buffers is completely +up to the kernel client; the relay interface does however provide +hooks which allow kernel clients to impose some structure on the +buffer data. The relay interface doesn't implement any form of data +filtering - this also is left to the kernel client. The purpose is to +keep things as simple as possible. + +This document provides an overview of the relay interface API. The +details of the function parameters are documented along with the +functions in the relay interface code - please see that for details. + +Semantics +========= + +Each relay channel has one buffer per CPU, each buffer has one or more +sub-buffers. Messages are written to the first sub-buffer until it is +too full to contain a new message, in which case it it is written to +the next (if available). Messages are never split across sub-buffers. +At this point, userspace can be notified so it empties the first +sub-buffer, while the kernel continues writing to the next. + +When notified that a sub-buffer is full, the kernel knows how many +bytes of it are padding i.e. unused space occurring because a complete +message couldn't fit into a sub-buffer. Userspace can use this +knowledge to copy only valid data. + +After copying it, userspace can notify the kernel that a sub-buffer +has been consumed. + +A relay channel can operate in a mode where it will overwrite data not +yet collected by userspace, and not wait for it to be consumed. + +The relay channel itself does not provide for communication of such +data between userspace and kernel, allowing the kernel side to remain +simple and not impose a single interface on userspace. It does +provide a set of examples and a separate helper though, described +below. + +The read() interface both removes padding and internally consumes the +read sub-buffers; thus in cases where read(2) is being used to drain +the channel buffers, special-purpose communication between kernel and +user isn't necessary for basic operation. + +One of the major goals of the relay interface is to provide a low +overhead mechanism for conveying kernel data to userspace. While the +read() interface is easy to use, it's not as efficient as the mmap() +approach; the example code attempts to make the tradeoff between the +two approaches as small as possible. + +klog and relay-apps example code +================================ + +The relay interface itself is ready to use, but to make things easier, +a couple simple utility functions and a set of examples are provided. + +The relay-apps example tarball, available on the relay sourceforge +site, contains a set of self-contained examples, each consisting of a +pair of .c files containing boilerplate code for each of the user and +kernel sides of a relay application. When combined these two sets of +boilerplate code provide glue to easily stream data to disk, without +having to bother with mundane housekeeping chores. + +The 'klog debugging functions' patch (klog.patch in the relay-apps +tarball) provides a couple of high-level logging functions to the +kernel which allow writing formatted text or raw data to a channel, +regardless of whether a channel to write into exists or not, or even +whether the relay interface is compiled into the kernel or not. These +functions allow you to put unconditional 'trace' statements anywhere +in the kernel or kernel modules; only when there is a 'klog handler' +registered will data actually be logged (see the klog and kleak +examples for details). + +It is of course possible to use the relay interface from scratch, +i.e. without using any of the relay-apps example code or klog, but +you'll have to implement communication between userspace and kernel, +allowing both to convey the state of buffers (full, empty, amount of +padding). The read() interface both removes padding and internally +consumes the read sub-buffers; thus in cases where read(2) is being +used to drain the channel buffers, special-purpose communication +between kernel and user isn't necessary for basic operation. Things +such as buffer-full conditions would still need to be communicated via +some channel though. + +klog and the relay-apps examples can be found in the relay-apps +tarball on http://relayfs.sourceforge.net + +The relay interface user space API +================================== + +The relay interface implements basic file operations for user space +access to relay channel buffer data. Here are the file operations +that are available and some comments regarding their behavior: + +open() enables user to open an _existing_ channel buffer. + +mmap() results in channel buffer being mapped into the caller's + memory space. Note that you can't do a partial mmap - you + must map the entire file, which is NRBUF * SUBBUFSIZE. + +read() read the contents of a channel buffer. The bytes read are + 'consumed' by the reader, i.e. they won't be available + again to subsequent reads. If the channel is being used + in no-overwrite mode (the default), it can be read at any + time even if there's an active kernel writer. If the + channel is being used in overwrite mode and there are + active channel writers, results may be unpredictable - + users should make sure that all logging to the channel has + ended before using read() with overwrite mode. Sub-buffer + padding is automatically removed and will not be seen by + the reader. + +sendfile() transfer data from a channel buffer to an output file + descriptor. Sub-buffer padding is automatically removed + and will not be seen by the reader. + +poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are + notified when sub-buffer boundaries are crossed. + +close() decrements the channel buffer's refcount. When the refcount + reaches 0, i.e. when no process or kernel client has the + buffer open, the channel buffer is freed. + +In order for a user application to make use of relay files, the +host filesystem must be mounted. For example, + + mount -t debugfs debugfs /debug + +NOTE: the host filesystem doesn't need to be mounted for kernel + clients to create or use channels - it only needs to be + mounted when user space applications need access to the buffer + data. + + +The relay interface kernel API +============================== + +Here's a summary of the API the relay interface provides to in-kernel clients: + +TBD(curr. line MT:/API/) + channel management functions: + + relay_open(base_filename, parent, subbuf_size, n_subbufs, + callbacks) + relay_close(chan) + relay_flush(chan) + relay_reset(chan) + + channel management typically called on instigation of userspace: + + relay_subbufs_consumed(chan, cpu, subbufs_consumed) + + write functions: + + relay_write(chan, data, length) + __relay_write(chan, data, length) + relay_reserve(chan, length) + + callbacks: + + subbuf_start(buf, subbuf, prev_subbuf, prev_padding) + buf_mapped(buf, filp) + buf_unmapped(buf, filp) + create_buf_file(filename, parent, mode, buf, is_global) + remove_buf_file(dentry) + + helper functions: + + relay_buf_full(buf) + subbuf_start_reserve(buf, length) + + +Creating a channel +------------------ + +relay_open() is used to create a channel, along with its per-cpu +channel buffers. Each channel buffer will have an associated file +created for it in the host filesystem, which can be and mmapped or +read from in user space. The files are named basename0...basenameN-1 +where N is the number of online cpus, and by default will be created +in the root of the filesystem (if the parent param is NULL). If you +want a directory structure to contain your relay files, you should +create it using the host filesystem's directory creation function, +e.g. debugfs_create_dir(), and pass the parent directory to +relay_open(). Users are responsible for cleaning up any directory +structure they create, when the channel is closed - again the host +filesystem's directory removal functions should be used for that, +e.g. debugfs_remove(). + +In order for a channel to be created and the host filesystem's files +associated with its channel buffers, the user must provide definitions +for two callback functions, create_buf_file() and remove_buf_file(). +create_buf_file() is called once for each per-cpu buffer from +relay_open() and allows the user to create the file which will be used +to represent the corresponding channel buffer. The callback should +return the dentry of the file created to represent the channel buffer. +remove_buf_file() must also be defined; it's responsible for deleting +the file(s) created in create_buf_file() and is called during +relay_close(). + +Here are some typical definitions for these callbacks, in this case +using debugfs: + +/* + * create_buf_file() callback. Creates relay file in debugfs. + */ +static struct dentry *create_buf_file_handler(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf, + int *is_global) +{ + return debugfs_create_file(filename, mode, parent, buf, + &relay_file_operations); +} + +/* + * remove_buf_file() callback. Removes relay file from debugfs. + */ +static int remove_buf_file_handler(struct dentry *dentry) +{ + debugfs_remove(dentry); + + return 0; +} + +/* + * relay interface callbacks + */ +static struct rchan_callbacks relay_callbacks = +{ + .create_buf_file = create_buf_file_handler, + .remove_buf_file = remove_buf_file_handler, +}; + +And an example relay_open() invocation using them: + + chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks); + +If the create_buf_file() callback fails, or isn't defined, channel +creation and thus relay_open() will fail. + +The total size of each per-cpu buffer is calculated by multiplying the +number of sub-buffers by the sub-buffer size passed into relay_open(). +The idea behind sub-buffers is that they're basically an extension of +double-buffering to N buffers, and they also allow applications to +easily implement random-access-on-buffer-boundary schemes, which can +be important for some high-volume applications. The number and size +of sub-buffers is completely dependent on the application and even for +the same application, different conditions will warrant different +values for these parameters at different times. Typically, the right +values to use are best decided after some experimentation; in general, +though, it's safe to assume that having only 1 sub-buffer is a bad +idea - you're guaranteed to either overwrite data or lose events +depending on the channel mode being used. + +The create_buf_file() implementation can also be defined in such a way +as to allow the creation of a single 'global' buffer instead of the +default per-cpu set. This can be useful for applications interested +mainly in seeing the relative ordering of system-wide events without +the need to bother with saving explicit timestamps for the purpose of +merging/sorting per-cpu files in a postprocessing step. + +To have relay_open() create a global buffer, the create_buf_file() +implementation should set the value of the is_global outparam to a +non-zero value in addition to creating the file that will be used to +represent the single buffer. In the case of a global buffer, +create_buf_file() and remove_buf_file() will be called only once. The +normal channel-writing functions, e.g. relay_write(), can still be +used - writes from any cpu will transparently end up in the global +buffer - but since it is a global buffer, callers should make sure +they use the proper locking for such a buffer, either by wrapping +writes in a spinlock, or by copying a write function from relay.h and +creating a local version that internally does the proper locking. + +Channel 'modes' +--------------- + +relay channels can be used in either of two modes - 'overwrite' or +'no-overwrite'. The mode is entirely determined by the implementation +of the subbuf_start() callback, as described below. The default if no +subbuf_start() callback is defined is 'no-overwrite' mode. If the +default mode suits your needs, and you plan to use the read() +interface to retrieve channel data, you can ignore the details of this +section, as it pertains mainly to mmap() implementations. + +In 'overwrite' mode, also known as 'flight recorder' mode, writes +continuously cycle around the buffer and will never fail, but will +unconditionally overwrite old data regardless of whether it's actually +been consumed. In no-overwrite mode, writes will fail, i.e. data will +be lost, if the number of unconsumed sub-buffers equals the total +number of sub-buffers in the channel. It should be clear that if +there is no consumer or if the consumer can't consume sub-buffers fast +enough, data will be lost in either case; the only difference is +whether data is lost from the beginning or the end of a buffer. + +As explained above, a relay channel is made of up one or more +per-cpu channel buffers, each implemented as a circular buffer +subdivided into one or more sub-buffers. Messages are written into +the current sub-buffer of the channel's current per-cpu buffer via the +write functions described below. Whenever a message can't fit into +the current sub-buffer, because there's no room left for it, the +client is notified via the subbuf_start() callback that a switch to a +new sub-buffer is about to occur. The client uses this callback to 1) +initialize the next sub-buffer if appropriate 2) finalize the previous +sub-buffer if appropriate and 3) return a boolean value indicating +whether or not to actually move on to the next sub-buffer. + +To implement 'no-overwrite' mode, the userspace client would provide +an implementation of the subbuf_start() callback something like the +following: + +static int subbuf_start(struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + unsigned int prev_padding) +{ + if (prev_subbuf) + *((unsigned *)prev_subbuf) = prev_padding; + + if (relay_buf_full(buf)) + return 0; + + subbuf_start_reserve(buf, sizeof(unsigned int)); + + return 1; +} + |