diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/00-INDEX | 4 | ||||
-rw-r--r-- | Documentation/SubmittingPatches | 56 | ||||
-rw-r--r-- | Documentation/filesystems/00-INDEX | 6 | ||||
-rw-r--r-- | Documentation/filesystems/nfsroot.txt (renamed from Documentation/nfsroot.txt) | 0 | ||||
-rw-r--r-- | Documentation/filesystems/rpc-cache.txt (renamed from Documentation/rpc-cache.txt) | 0 | ||||
-rw-r--r-- | Documentation/filesystems/seq_file.txt | 283 | ||||
-rw-r--r-- | Documentation/hrtimers/highres.txt | 2 | ||||
-rw-r--r-- | Documentation/i386/IO-APIC.txt | 2 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 15 | ||||
-rw-r--r-- | Documentation/lguest/lguest.c | 70 | ||||
-rw-r--r-- | Documentation/lguest/lguest.txt | 19 | ||||
-rw-r--r-- | Documentation/networking/00-INDEX | 3 | ||||
-rw-r--r-- | Documentation/networking/sk98lin.txt | 568 | ||||
-rw-r--r-- | Documentation/nmi_watchdog.txt | 3 | ||||
-rw-r--r-- | Documentation/scheduler/00-INDEX | 2 | ||||
-rw-r--r-- | Documentation/scheduler/sched-rt-group.txt (renamed from Documentation/sched-rt-group.txt) | 0 | ||||
-rw-r--r-- | Documentation/spi/spi-summary | 15 | ||||
-rw-r--r-- | Documentation/spinlocks.txt | 22 | ||||
-rw-r--r-- | Documentation/unaligned-memory-access.txt | 4 |
19 files changed, 441 insertions, 633 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index fc8e7c7d182..e8fb2467196 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -271,8 +271,6 @@ netlabel/ - directory with information on the NetLabel subsystem. networking/ - directory with info on various aspects of networking with Linux. -nfsroot.txt - - short guide on setting up a diskless box with NFS root filesystem. nmi_watchdog.txt - info on NMI watchdog for SMP systems. nommu-mmap.txt @@ -321,8 +319,6 @@ robust-futexes.txt - a description of what robust futexes are. rocket.txt - info on the Comtrol RocketPort multiport serial driver. -rpc-cache.txt - - introduction to the caching mechanisms in the sunrpc layer. rt-mutex-design.txt - description of the RealTime mutex implementation design. rt-mutex.txt diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 08a1ed1cb5d..1fc4e7144dc 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -328,7 +328,7 @@ now, but you can do this to mark internal company procedures or just point out some special detail about the sign-off. -13) When to use Acked-by: +13) When to use Acked-by: and Cc: The Signed-off-by: tag indicates that the signer was involved in the development of the patch, or that he/she was in the patch's delivery path. @@ -349,11 +349,59 @@ Acked-by: does not necessarily indicate acknowledgement of the entire patch. For example, if a patch affects multiple subsystems and has an Acked-by: from one subsystem maintainer then this usually indicates acknowledgement of just the part which affects that maintainer's code. Judgement should be used here. - When in doubt people should refer to the original discussion in the mailing +When in doubt people should refer to the original discussion in the mailing list archives. +If a person has had the opportunity to comment on a patch, but has not +provided such comments, you may optionally add a "Cc:" tag to the patch. +This is the only tag which might be added without an explicit action by the +person it names. This tag documents that potentially interested parties +have been included in the discussion -14) The canonical patch format + +14) Using Test-by: and Reviewed-by: + +A Tested-by: tag indicates that the patch has been successfully tested (in +some environment) by the person named. This tag informs maintainers that +some testing has been performed, provides a means to locate testers for +future patches, and ensures credit for the testers. + +Reviewed-by:, instead, indicates that the patch has been reviewed and found +acceptable according to the Reviewer's Statement: + + Reviewer's statement of oversight + + By offering my Reviewed-by: tag, I state that: + + (a) I have carried out a technical review of this patch to + evaluate its appropriateness and readiness for inclusion into + the mainline kernel. + + (b) Any problems, concerns, or questions relating to the patch + have been communicated back to the submitter. I am satisfied + with the submitter's response to my comments. + + (c) While there may be things that could be improved with this + submission, I believe that it is, at this time, (1) a + worthwhile modification to the kernel, and (2) free of known + issues which would argue against its inclusion. + + (d) While I have reviewed the patch and believe it to be sound, I + do not (unless explicitly stated elsewhere) make any + warranties or guarantees that it will achieve its stated + purpose or function properly in any given situation. + +A Reviewed-by tag is a statement of opinion that the patch is an +appropriate modification of the kernel without any remaining serious +technical issues. Any interested reviewer (who has done the work) can +offer a Reviewed-by tag for a patch. This tag serves to give credit to +reviewers and to inform maintainers of the degree of review which has been +done on the patch. Reviewed-by: tags, when supplied by reviewers known to +understand the subject area and to perform thorough reviews, will normally +increase the liklihood of your patch getting into the kernel. + + +15) The canonical patch format The canonical patch subject line is: @@ -512,7 +560,7 @@ They provide type safety, have no length limitations, no formatting limitations, and under gcc they are as cheap as macros. Macros should only be used for cases where a static inline is clearly -suboptimal [there a few, isolated cases of this in fast paths], +suboptimal [there are a few, isolated cases of this in fast paths], or where it is impossible to use a static inline function [such as string-izing]. diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index e68021c08fb..52cd611277a 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -66,6 +66,8 @@ mandatory-locking.txt - info on the Linux implementation of Sys V mandatory file locking. ncpfs.txt - info on Novell Netware(tm) filesystem using NCP protocol. +nfsroot.txt + - short guide on setting up a diskless box with NFS root filesystem. ntfs.txt - info and mount options for the NTFS filesystem (Windows NT). ocfs2.txt @@ -82,6 +84,10 @@ relay.txt - info on relay, for efficient streaming from kernel to user space. romfs.txt - description of the ROMFS filesystem. +rpc-cache.txt + - introduction to the caching mechanisms in the sunrpc layer. +seq_file.txt + - how to use the seq_file API sharedsubtree.txt - a description of shared subtrees for namespaces. smbfs.txt diff --git a/Documentation/nfsroot.txt b/Documentation/filesystems/nfsroot.txt index 31b32917234..31b32917234 100644 --- a/Documentation/nfsroot.txt +++ b/Documentation/filesystems/nfsroot.txt diff --git a/Documentation/rpc-cache.txt b/Documentation/filesystems/rpc-cache.txt index 8a382bea680..8a382bea680 100644 --- a/Documentation/rpc-cache.txt +++ b/Documentation/filesystems/rpc-cache.txt diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.txt new file mode 100644 index 00000000000..cc6cdb95b73 --- /dev/null +++ b/Documentation/filesystems/seq_file.txt @@ -0,0 +1,283 @@ +The seq_file interface + + Copyright 2003 Jonathan Corbet <corbet@lwn.net> + This file is originally from the LWN.net Driver Porting series at + http://lwn.net/Articles/driver-porting/ + + +There are numerous ways for a device driver (or other kernel component) to +provide information to the user or system administrator. One useful +technique is the creation of virtual files, in debugfs, /proc or elsewhere. +Virtual files can provide human-readable output that is easy to get at +without any special utility programs; they can also make life easier for +script writers. It is not surprising that the use of virtual files has +grown over the years. + +Creating those files correctly has always been a bit of a challenge, +however. It is not that hard to make a virtual file which returns a +string. But life gets trickier if the output is long - anything greater +than an application is likely to read in a single operation. Handling +multiple reads (and seeks) requires careful attention to the reader's +position within the virtual file - that position is, likely as not, in the +middle of a line of output. The kernel has traditionally had a number of +implementations that got this wrong. + +The 2.6 kernel contains a set of functions (implemented by Alexander Viro) +which are designed to make it easy for virtual file creators to get it +right. + +The seq_file interface is available via <linux/seq_file.h>. There are +three aspects to seq_file: + + * An iterator interface which lets a virtual file implementation + step through the objects it is presenting. + + * Some utility functions for formatting objects for output without + needing to worry about things like output buffers. + + * A set of canned file_operations which implement most operations on + the virtual file. + +We'll look at the seq_file interface via an extremely simple example: a +loadable module which creates a file called /proc/sequence. The file, when +read, simply produces a set of increasing integer values, one per line. The +sequence will continue until the user loses patience and finds something +better to do. The file is seekable, in that one can do something like the +following: + + dd if=/proc/sequence of=out1 count=1 + dd if=/proc/sequence skip=1 out=out2 count=1 + +Then concatenate the output files out1 and out2 and get the right +result. Yes, it is a thoroughly useless module, but the point is to show +how the mechanism works without getting lost in other details. (Those +wanting to see the full source for this module can find it at +http://lwn.net/Articles/22359/). + + +The iterator interface + +Modules implementing a virtual file with seq_file must implement a simple +iterator object that allows stepping through the data of interest. +Iterators must be able to move to a specific position - like the file they +implement - but the interpretation of that position is up to the iterator +itself. A seq_file implementation that is formatting firewall rules, for +example, could interpret position N as the Nth rule in the chain. +Positioning can thus be done in whatever way makes the most sense for the +generator of the data, which need not be aware of how a position translates +to an offset in the virtual file. The one obvious exception is that a +position of zero should indicate the beginning of the file. + +The /proc/sequence iterator just uses the count of the next number it +will output as its position. + +Four functions must be implemented to make the iterator work. The first, +called start() takes a position as an argument and returns an iterator +which will start reading at that position. For our simple sequence example, +the start() function looks like: + + static void *ct_seq_start(struct seq_file *s, loff_t *pos) + { + loff_t *spos = kmalloc(sizeof(loff_t), GFP_KERNEL); + if (! spos) + return NULL; + *spos = *pos; + return spos; + } + +The entire data structure for this iterator is a single loff_t value +holding the current position. There is no upper bound for the sequence +iterator, but that will not be the case for most other seq_file +implementations; in most cases the start() function should check for a +"past end of file" condition and return NULL if need be. + +For more complicated applications, the private field of the seq_file +structure can be used. There is also a special value whch can be returned +by the start() function called SEQ_START_TOKEN; it can be used if you wish +to instruct your show() function (described below) to print a header at the +top of the output. SEQ_START_TOKEN should only be used if the offset is +zero, however. + +The next function to implement is called, amazingly, next(); its job is to +move the iterator forward to the next position in the sequence. The +example module can simply increment the position by one; more useful +modules will do what is needed to step through some data structure. The +next() function returns a new iterator, or NULL if the sequence is +complete. Here's the example version: + + static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) + { + loff_t *spos = v; + *pos = ++*spos; + return spos; + } + +The stop() function is called when iteration is complete; its job, of +course, is to clean up. If dynamic memory is allocated for the iterator, +stop() is the place to free it. + + static void ct_seq_stop(struct seq_file *s, void *v) + { + kfree(v); + } + +Finally, the show() function should format the object currently pointed to +by the iterator for output. It should return zero, or an error code if +something goes wrong. The example module's show() function is: + + static int ct_seq_show(struct seq_file *s, void *v) + { + loff_t *spos = v; + seq_printf(s, "%lld\n", (long long)*spos); + return 0; + } + +We will look at seq_printf() in a moment. But first, the definition of the +seq_file iterator is finished by creating a seq_operations structure with +the four functions we have just defined: + + static const struct seq_operations ct_seq_ops = { + .start = ct_seq_start, + .next = ct_seq_next, + .stop = ct_seq_stop, + .show = ct_seq_show + }; + +This structure will be needed to tie our iterator to the /proc file in +a little bit. + +It's worth noting that the interator value returned by start() and +manipulated by the other functions is considered to be completely opaque by +the seq_file code. It can thus be anything that is useful in stepping +through the data to be output. Counters can be useful, but it could also be +a direct pointer into an array or linked list. Anything goes, as long as +the programmer is aware that things can happen between calls to the +iterator function. However, the seq_file code (by design) will not sleep +between the calls to start() and stop(), so holding a lock during that time +is a reasonable thing to do. The seq_file code will also avoid taking any +other locks while the iterator is active. + + +Formatted output + +The seq_file code manages positioning within the output created by the +iterator and getting it into the user's buffer. But, for that to work, that +output must be passed to the seq_file code. Some utility functions have +been defined which make this task easy. + +Most code will simply use seq_printf(), which works pretty much like +printk(), but which requires the seq_file pointer as an argument. It is +common to ignore the return value from seq_printf(), but a function +producing complicated output may want to check that value and quit if +something non-zero is returned; an error return means that the seq_file +buffer has been filled and further output will be discarded. + +For straight character output, the following functions may be used: + + int seq_putc(struct seq_file *m, char c); + int seq_puts(struct seq_file *m, const char *s); + int seq_escape(struct seq_file *m, const char *s, const char *esc); + +The first two output a single character and a string, just like one would +expect. seq_escape() is like seq_puts(), except that any character in s +which is in the string esc will be represented in octal form in the output. + +There is also a function for printing filenames: + + int seq_path(struct seq_file *m, struct path *path, char *esc); + +Here, path indicates the file of interest, and esc is a set of characters +which should be escaped in the output. + + +Making it all work + +So far, we have a nice set of functions which can produce output within the +seq_file system, but we have not yet turned them into a file that a user +can see. Creating a file within the kernel requires, of course, the +creation of a set of file_operations which implement the operations on that +file. The seq_file interface provides a set of canned operations which do +most of the work. The virtual file author still must implement the open() +method, however, to hook everything up. The open function is often a single +line, as in the example module: + + static int ct_open(struct inode *inode, struct file *file) + { + return seq_open(file, &ct_seq_ops); + } + +Here, the call to seq_open() takes the seq_operations structure we created +before, and gets set up to iterate through the virtual file. + +On a successful open, seq_open() stores the struct seq_file pointer in +file->private_data. If you have an application where the same iterator can +be used for more than one file, you can store an arbitrary pointer in the +private field of the seq_file structure; that value can then be retrieved +by the iterator functions. + +The other operations of interest - read(), llseek(), and release() - are +all implemented by the seq_file code itself. So a virtual file's +file_operations structure will look like: + + static const struct file_operations ct_file_ops = { + .owner = THIS_MODULE, + .open = ct_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release + }; + +There is also a seq_release_private() which passes the contents of the +seq_file private field to kfree() before releasing the structure. + +The final step is the creation of the /proc file itself. In the example +code, that is done in the initialization code in the usual way: + + static int ct_init(void) + { + struct proc_dir_entry *entry; + + entry = create_proc_entry("sequence", 0, NULL); + if (entry) + entry->proc_fops = &ct_file_ops; + return 0; + } + + module_init(ct_init); + +And that is pretty much it. + + +seq_list + +If your file will be iterating through a linked list, you may find these +routines useful: + + struct list_head *seq_list_start(struct list_head *head, + loff_t pos); + struct list_head *seq_list_start_head(struct list_head *head, + loff_t pos); + struct list_head *seq_list_next(void *v, struct list_head *head, + loff_t *ppos); + +These helpers will interpret pos as a position within the list and iterate +accordingly. Your start() and next() functions need only invoke the +seq_list_* helpers with a pointer to the appropriate list_head structure. + + +The extra-simple version + +For extremely simple virtual files, there is an even easier interface. A +module can define only the show() function, which should create all the +output that the virtual file will contain. The file's open() method then +calls: + + int single_open(struct file *file, + int (*show)(struct seq_file *m, void *p), + void *data); + +When output time comes, the show() function will be called once. The data +value given to single_open() can be found in the private field of the +seq_file structure. When using single_open(), the programmer should use +single_release() instead of seq_release() in the file_operations structure +to avoid a memory leak. diff --git a/Documentation/hrtimers/highres.txt b/Documentation/hrtimers/highres.txt index ce0e9a91e15..a73ecf5b4bd 100644 --- a/Documentation/hrtimers/highres.txt +++ b/Documentation/hrtimers/highres.txt @@ -98,7 +98,7 @@ System-level global event devices are used for the Linux periodic tick. Per-CPU event devices are used to provide local CPU functionality such as process accounting, profiling, and high resolution timers. -The management layer assignes one or more of the folliwing functions to a clock +The management layer assigns one or more of the following functions to a clock event device: - system global periodic tick (jiffies update) - cpu local update_process_times diff --git a/Documentation/i386/IO-APIC.txt b/Documentation/i386/IO-APIC.txt index f95166645d2..30b4c714fbe 100644 --- a/Documentation/i386/IO-APIC.txt +++ b/Documentation/i386/IO-APIC.txt @@ -70,7 +70,7 @@ Every PCI card emits a PCI IRQ, which can be INTA, INTB, INTC or INTD: These INTA-D PCI IRQs are always 'local to the card', their real meaning depends on which slot they are in. If you look at the daisy chaining diagram, -a card in slot4, issuing INTA IRQ, it will end up as a signal on PIRQ2 of +a card in slot4, issuing INTA IRQ, it will end up as a signal on PIRQ4 of the PCI chipset. Most cards issue INTA, this creates optimal distribution between the PIRQ lines. (distributing IRQ sources properly is not a necessity, PCI IRQs can be shared at will, but it's a good for performance diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 508e2a2c986..dafd001bf83 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -170,11 +170,6 @@ and is between 256 and 4096 characters. It is defined in the file acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA Format: <irq>,<irq>... - acpi_new_pts_ordering [HW,ACPI] - Enforce the ACPI 2.0 ordering of the _PTS control - method wrt putting devices into low power states - default: pre ACPI 2.0 ordering of _PTS - acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS @@ -380,6 +375,10 @@ and is between 256 and 4096 characters. It is defined in the file ccw_timeout_log [S390] See Documentation/s390/CommonIO for details. + cgroup_disable= [KNL] Disable a particular controller + Format: {name of the controller(s) to disable} + {Currently supported controllers - "memory"} + checkreqprot [SELINUX] Set initial checkreqprot flag value. Format: { "0" | "1" } See security/selinux/Kconfig help text. @@ -845,7 +844,7 @@ and is between 256 and 4096 characters. It is defined in the file arch/alpha/kernel/core_marvel.c. ip= [IP_PNP] - See Documentation/nfsroot.txt. + See Documentation/filesystems/nfsroot.txt. ip2= [HW] Set IO/IRQ pairs for up to 4 IntelliPort boards See comment before ip2_setup() in @@ -1199,10 +1198,10 @@ and is between 256 and 4096 characters. It is defined in the file file if at all. nfsaddrs= [NFS] - See Documentation/nfsroot.txt. + See Documentation/filesystems/nfsroot.txt. nfsroot= [NFS] nfs root filesystem for disk-less boxes. - See Documentation/nfsroot.txt. + See Documentation/filesystems/nfsroot.txt. nfs.callback_tcpport= [NFS] set the TCP port on which the NFSv4 callback diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index bec5a32e409..4c1fc65a8b3 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -1,7 +1,7 @@ /*P:100 This is the Launcher code, a simple program which lays out the - * "physical" memory for the new Guest by mapping the kernel image and the - * virtual devices, then reads repeatedly from /dev/lguest to run the Guest. -:*/ + * "physical" memory for the new Guest by mapping the kernel image and + * the virtual devices, then opens /dev/lguest to tell the kernel + * about the Guest and control it. :*/ #define _LARGEFILE64_SOURCE #define _GNU_SOURCE #include <stdio.h> @@ -43,7 +43,7 @@ #include "linux/virtio_console.h" #include "linux/virtio_ring.h" #include "asm-x86/bootparam.h" -/*L:110 We can ignore the 38 include files we need for this program, but I do +/*L:110 We can ignore the 39 include files we need for this program, but I do * want to draw attention to the use of kernel-style types. * * As Linus said, "C is a Spartan language, and so should your naming be." I @@ -320,7 +320,7 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) err(1, "Reading program headers"); /* Try all the headers: there are usually only three. A read-only one, - * a read-write one, and a "note" section which isn't loadable. */ + * a read-write one, and a "note" section which we don't load. */ for (i = 0; i < ehdr->e_phnum; i++) { /* If this isn't a loadable segment, we ignore it */ if (phdr[i].p_type != PT_LOAD) @@ -387,7 +387,7 @@ static unsigned long load_kernel(int fd) if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) return map_elf(fd, &hdr); - /* Otherwise we assume it's a bzImage, and try to unpack it */ + /* Otherwise we assume it's a bzImage, and try to load it. */ return load_bzimage(fd); } @@ -433,12 +433,12 @@ static unsigned long load_initrd(const char *name, unsigned long mem) return len; } -/* Once we know how much memory we have, we can construct simple linear page +/* Once we know how much memory we have we can construct simple linear page * tables which set virtual == physical which will get the Guest far enough * into the boot to create its own. * * We lay them out of the way, just below the initrd (which is why we need to - * know its size). */ + * know its size here). */ static unsigned long setup_pagetables(unsigned long mem, unsigned long initrd_size) { @@ -850,7 +850,8 @@ static void handle_console_output(int fd, struct virtqueue *vq) * * Handling output for network is also simple: we get all the output buffers * and write them (ignoring the first element) to this device's file descriptor - * (stdout). */ + * (/dev/net/tun). + */ static void handle_net_output(int fd, struct virtqueue *vq) { unsigned int head, out, in; @@ -924,7 +925,7 @@ static void enable_fd(int fd, struct virtqueue *vq) write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); } -/* Resetting a device is fairly easy. */ +/* When the Guest asks us to reset a device, it's is fairly easy. */ static void reset_device(struct device *dev) { struct virtqueue *vq; @@ -1003,8 +1004,8 @@ static void handle_input(int fd) if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) break; - /* Otherwise, call the device(s) which have readable - * file descriptors and a method of handling them. */ + /* Otherwise, call the device(s) which have readable file + * descriptors and a method of handling them. */ for (i = devices.dev; i; i = i->next) { if (i->handle_input && FD_ISSET(i->fd, &fds)) { int dev_fd; @@ -1015,8 +1016,7 @@ static void handle_input(int fd) * should no longer service it. Networking and * console do this when there's no input * buffers to deliver into. Console also uses - * it when it discovers that stdin is - * closed. */ + * it when it discovers that stdin is closed. */ FD_CLR(i->fd, &devices.infds); /* Tell waker to ignore it too, by sending a * negative fd number (-1, since 0 is a valid @@ -1033,7 +1033,8 @@ static void handle_input(int fd) * * All devices need a descriptor so the Guest knows it exists, and a "struct * device" so the Launcher can keep track of it. We have common helper - * routines to allocate and manage them. */ + * routines to allocate and manage them. + */ /* The layout of the device page is a "struct lguest_device_desc" followed by a * number of virtqueue descriptors, then two sets of feature bits, then an @@ -1078,7 +1079,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, struct virtqueue **i, *vq = malloc(sizeof(*vq)); void *p; - /* First we need some pages for this virtqueue. */ + /* First we need some memory for this virtqueue. */ pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) / getpagesize(); p = get_pages(pages); @@ -1122,7 +1123,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, } /* The first half of the feature bitmask is for us to advertise features. The - * second half if for the Guest to accept features. */ + * second half is for the Guest to accept features. */ static void add_feature(struct device *dev, unsigned bit) { u8 *features = get_feature_bits(dev); @@ -1151,7 +1152,9 @@ static void set_config(struct device *dev, unsigned len, const void *conf) } /* This routine does all the creation and setup of a new device, including - * calling new_dev_desc() to allocate the descriptor and device memory. */ + * calling new_dev_desc() to allocate the descriptor and device memory. + * + * See what I mean about userspace being boring? */ static struct device *new_device(const char *name, u16 type, int fd, bool (*handle_input)(int, struct device *)) { @@ -1383,7 +1386,6 @@ struct vblk_info * Launcher triggers interrupt to Guest. */ int done_fd; }; -/*:*/ /*L:210 * The Disk @@ -1493,7 +1495,10 @@ static int io_thread(void *_dev) while (read(vblk->workpipe[0], &c, 1) == 1) { /* We acknowledge each request immediately to reduce latency, * rather than waiting until we've done them all. I haven't - * measured to see if it makes any difference. */ + * measured to see if it makes any difference. + * + * That would be an interesting test, wouldn't it? You could + * also try having more than one I/O thread. */ while (service_io(dev)) write(vblk->done_fd, &c, 1); } @@ -1501,7 +1506,7 @@ static int io_thread(void *_dev) } /* Now we've seen the I/O thread, we return to the Launcher to see what happens - * when the thread tells us it's completed some I/O. */ + * when that thread tells us it's completed some I/O. */ static bool handle_io_finish(int fd, struct device *dev) { char c; @@ -1573,11 +1578,12 @@ static void setup_block_file(const char *filename) * more work. */ pipe(vblk->workpipe); - /* Create stack for thread and run it */ + /* Create stack for thread and run it. Since stack grows upwards, we + * point the stack pointer to the end of this region. */ stack = malloc(32768); /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from * becoming a zombie. */ - if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) + if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) err(1, "Creating clone"); /* We don't need to keep the I/O thread's end of the pipes open. */ @@ -1587,14 +1593,14 @@ static void setup_block_file(const char *filename) verbose("device %u: virtblock %llu sectors\n", devices.device_num, le64_to_cpu(conf.capacity)); } -/* That's the end of device setup. :*/ +/* That's the end of device setup. */ -/* Reboot */ +/*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */ static void __attribute__((noreturn)) restart_guest(void) { unsigned int i; - /* Closing pipes causes the waker thread and io_threads to die, and + /* Closing pipes causes the Waker thread and io_threads to die, and * closing /dev/lguest cleans up the Guest. Since we don't track all * open fds, we simply close everything beyond stderr. */ for (i = 3; i < FD_SETSIZE; i++) @@ -1603,7 +1609,7 @@ static void __attribute__((noreturn)) restart_guest(void) err(1, "Could not exec %s", main_args[0]); } -/*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves +/*L:220 Finally we reach the core of the Launcher which runs the Guest, serves * its input and output, and finally, lays it to rest. */ static void __attribute__((noreturn)) run_guest(int lguest_fd) { @@ -1644,7 +1650,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) err(1, "Resetting break"); } } -/* +/*L:240 * This is the end of the Launcher. The good news: we are over halfway * through! The bad news: the most fiendish part of the code still lies ahead * of us. @@ -1691,8 +1697,8 @@ int main(int argc, char *argv[]) * device receive input from a file descriptor, we keep an fdset * (infds) and the maximum fd number (max_infd) with the head of the * list. We also keep a pointer to the last device. Finally, we keep - * the next interrupt number to hand out (1: remember that 0 is used by - * the timer). */ + * the next interrupt number to use for devices (1: remember that 0 is + * used by the timer). */ FD_ZERO(&devices.infds); devices.max_infd = -1; devices.lastdev = NULL; @@ -1793,8 +1799,8 @@ int main(int argc, char *argv[]) lguest_fd = tell_kernel(pgdir, start); /* We fork off a child process, which wakes the Launcher whenever one - * of the input file descriptors needs attention. Otherwise we would - * run the Guest until it tries to output something. */ + * of the input file descriptors needs attention. We call this the + * Waker, and we'll cover it in a moment. */ waker_fd = setup_waker(lguest_fd); /* Finally, run the Guest. This doesn't return. */ diff --git a/Documentation/lguest/lguest.txt b/Documentation/lguest/lguest.txt index 722d4e7fbeb..29510dc5151 100644 --- a/Documentation/lguest/lguest.txt +++ b/Documentation/lguest/lguest.txt @@ -1,6 +1,7 @@ -Rusty's Remarkably Unreliable Guide to Lguest - - or, A Young Coder's Illustrated Hypervisor -http://lguest.ozlabs.org + __ + (___()'`; Rusty's Remarkably Unreliable Guide to Lguest + /, /` - or, A Young Coder's Illustrated Hypervisor + \\"--\\ http://lguest.ozlabs.org Lguest is designed to be a minimal hypervisor for the Linux kernel, for Linux developers and users to experiment with virtualization with the @@ -41,12 +42,16 @@ Running Lguest: |