diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-08-11 14:19:09 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-08-11 14:19:09 +0200 |
commit | 89034bc2c7b839702c00a704e79d112737f98be0 (patch) | |
tree | e65b1f3d4c751baa840efc81bc4734f089379eb3 /Documentation | |
parent | fb82ad719831db58e9baa4c67015aae3fe27e7e3 (diff) | |
parent | 85dfd81dc57e8183a277ddd7a56aa65c96f3f487 (diff) |
Merge branch 'linus' into tracing/core
Conflicts:
kernel/trace/trace_events_filter.c
We use the tracing/core version.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/ABI/testing/sysfs-block | 37 | ||||
-rw-r--r-- | Documentation/DocBook/kernel-hacking.tmpl | 4 | ||||
-rw-r--r-- | Documentation/RCU/rculist_nulls.txt | 7 | ||||
-rw-r--r-- | Documentation/arm/memory.txt | 2 | ||||
-rw-r--r-- | Documentation/connector/cn_test.c | 4 | ||||
-rw-r--r-- | Documentation/connector/ucon.c | 2 | ||||
-rw-r--r-- | Documentation/filesystems/sysfs.txt | 3 | ||||
-rw-r--r-- | Documentation/ioctl/ioctl-number.txt | 1 | ||||
-rw-r--r-- | Documentation/laptops/thinkpad-acpi.txt | 127 | ||||
-rw-r--r-- | Documentation/lguest/lguest.c | 721 | ||||
-rw-r--r-- | Documentation/lockdep-design.txt | 6 | ||||
-rw-r--r-- | Documentation/networking/6pack.txt | 2 | ||||
-rw-r--r-- | Documentation/sound/alsa/Procfile.txt | 5 | ||||
-rw-r--r-- | Documentation/sysrq.txt | 7 | ||||
-rw-r--r-- | Documentation/video4linux/CARDLIST.em28xx | 2 | ||||
-rw-r--r-- | Documentation/video4linux/gspca.txt | 32 |
16 files changed, 568 insertions, 394 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index cbbd3e06994..5f3bedaf8e3 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -94,28 +94,37 @@ What: /sys/block/<disk>/queue/physical_block_size Date: May 2009 Contact: Martin K. Petersen <martin.petersen@oracle.com> Description: - This is the smallest unit the storage device can write - without resorting to read-modify-write operation. It is - usually the same as the logical block size but may be - bigger. One example is SATA drives with 4KB sectors - that expose a 512-byte logical block size to the - operating system. + This is the smallest unit a physical storage device can + write atomically. It is usually the same as the logical + block size but may be bigger. One example is SATA + drives with 4KB sectors that expose a 512-byte logical + block size to the operating system. For stacked block + devices the physical_block_size variable contains the + maximum physical_block_size of the component devices. What: /sys/block/<disk>/queue/minimum_io_size Date: April 2009 Contact: Martin K. Petersen <martin.petersen@oracle.com> Description: - Storage devices may report a preferred minimum I/O size, - which is the smallest request the device can perform - without incurring a read-modify-write penalty. For disk - drives this is often the physical block size. For RAID - arrays it is often the stripe chunk size. + Storage devices may report a granularity or preferred + minimum I/O size which is the smallest request the + device can perform without incurring a performance + penalty. For disk drives this is often the physical + block size. For RAID arrays it is often the stripe + chunk size. A properly aligned multiple of + minimum_io_size is the preferred request size for + workloads where a high number of I/O operations is + desired. What: /sys/block/<disk>/queue/optimal_io_size Date: April 2009 Contact: Martin K. Petersen <martin.petersen@oracle.com> Description: Storage devices may report an optimal I/O size, which is - the device's preferred unit of receiving I/O. This is - rarely reported for disk drives. For RAID devices it is - usually the stripe width or the internal block size. + the device's preferred unit for sustained I/O. This is + rarely reported for disk drives. For RAID arrays it is + usually the stripe width or the internal track size. A + properly aligned multiple of optimal_io_size is the + preferred request size for workloads where sustained + throughput is desired. If no optimal I/O size is + reported this file contains 0. diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl index a50d6cd5857..992e67e6be7 100644 --- a/Documentation/DocBook/kernel-hacking.tmpl +++ b/Documentation/DocBook/kernel-hacking.tmpl @@ -449,8 +449,8 @@ printk(KERN_INFO "i = %u\n", i); </para> <programlisting> -__u32 ipaddress; -printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); +__be32 ipaddress; +printk(KERN_INFO "my ip: %pI4\n", &ipaddress); </programlisting> <para> diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt index 93cb28d05dc..18f9651ff23 100644 --- a/Documentation/RCU/rculist_nulls.txt +++ b/Documentation/RCU/rculist_nulls.txt @@ -83,11 +83,12 @@ not detect it missed following items in original chain. obj = kmem_cache_alloc(...); lock_chain(); // typically a spin_lock() obj->key = key; -atomic_inc(&obj->refcnt); /* * we need to make sure obj->key is updated before obj->next + * or obj->refcnt */ smp_wmb(); +atomic_set(&obj->refcnt, 1); hlist_add_head_rcu(&obj->obj_node, list); unlock_chain(); // typically a spin_unlock() @@ -159,6 +160,10 @@ out: obj = kmem_cache_alloc(cachep); lock_chain(); // typically a spin_lock() obj->key = key; +/* + * changes to obj->key must be visible before refcnt one + */ +smp_wmb(); atomic_set(&obj->refcnt, 1); /* * insert obj in RCU way (readers might be traversing chain) diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt index 43cb1004d35..9d58c7c5edd 100644 --- a/Documentation/arm/memory.txt +++ b/Documentation/arm/memory.txt @@ -21,6 +21,8 @@ ffff8000 ffffffff copy_user_page / clear_user_page use. For SA11xx and Xscale, this is used to setup a minicache mapping. +ffff4000 ffffffff cache aliasing on ARMv6 and later CPUs. + ffff1000 ffff7fff Reserved. Platforms must not use this address range. diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c index f688eba8770..6a5be5d5c8e 100644 --- a/Documentation/connector/cn_test.c +++ b/Documentation/connector/cn_test.c @@ -1,7 +1,7 @@ /* * cn_test.c * - * 2004-2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru> + * 2004+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net> * All rights reserved. * * This program is free software; you can redistribute it and/or modify @@ -194,5 +194,5 @@ module_init(cn_test_init); module_exit(cn_test_fini); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>"); +MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); MODULE_DESCRIPTION("Connector's test module"); diff --git a/Documentation/connector/ucon.c b/Documentation/connector/ucon.c index d738cde2a8d..c5092ad0ce4 100644 --- a/Documentation/connector/ucon.c +++ b/Documentation/connector/ucon.c @@ -1,7 +1,7 @@ /* * ucon.c * - * Copyright (c) 2004+ Evgeniy Polyakov <johnpol@2ka.mipt.ru> + * Copyright (c) 2004+ Evgeniy Polyakov <zbr@ioremap.net> * * * This program is free software; you can redistribute it and/or modify diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index 7e81e37c0b1..b245d524d56 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt @@ -23,7 +23,8 @@ interface. Using sysfs ~~~~~~~~~~~ -sysfs is always compiled in. You can access it by doing: +sysfs is always compiled in if CONFIG_SYSFS is defined. You can access +it by doing: mount -t sysfs sysfs /sys diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 7bb0d934b6d..dbea4f95fc8 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -139,6 +139,7 @@ Code Seq# Include File Comments 'm' all linux/synclink.h conflict! 'm' 00-1F net/irda/irmod.h conflict! 'n' 00-7F linux/ncp_fs.h +'n' 80-8F linux/nilfs2_fs.h NILFS2 'n' E0-FF video/matrox.h matroxfb 'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2 'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps) diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index f2296ecedb8..e2ddcdeb61b 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -36,8 +36,6 @@ detailed description): - Bluetooth enable and disable - video output switching, expansion control - ThinkLight on and off - - limited docking and undocking - - UltraBay eject - CMOS/UCMS control - LED control - ACPI sounds @@ -729,131 +727,6 @@ cannot be read or if it is unknown, thinkpad-acpi will report it as "off". It is impossible to know if the status returned through sysfs is valid. -Docking / undocking -- /proc/acpi/ibm/dock ------------------------------------------- - -Docking and undocking (e.g. with the X4 UltraBase) requires some -actions to be taken by the operating system to safely make or break -the electrical connections with the dock. - -The docking feature of this driver generates the following ACPI events: - - ibm/dock GDCK 00000003 00000001 -- eject request - ibm/dock GDCK 00000003 00000002 -- undocked - ibm/dock GDCK 00000000 00000003 -- docked - -NOTE: These events will only be generated if the laptop was docked -when originally booted. This is due to the current lack of support for -hot plugging of devices in the Linux ACPI framework. If the laptop was -booted while not in the dock, the following message is shown in the -logs: - - Mar 17 01:42:34 aero kernel: thinkpad_acpi: dock device not present - -In this case, no dock-related events are generated but the dock and -undock commands described below still work. They can be executed -manually or triggered by Fn key combinations (see the example acpid -configuration files included in the driver tarball package available -on the web site). - -When the eject request button on the dock is pressed, the first event -above is generated. The handler for this event should issue the -following command: - - echo undock > /proc/acpi/ibm/dock - -After the LED on the dock goes off, it is safe to eject the laptop. -Note: if you pressed this key by mistake, go ahead and eject the -laptop, then dock it back in. Otherwise, the dock may not function as -expected. - -When the laptop is docked, the third event above is generated. The -handler for this event should issue the following command to fully -enable the dock: - - echo dock > /proc/acpi/ibm/dock - -The contents of the /proc/acpi/ibm/dock file shows the current status -of the dock, as provided by the ACPI framework. - -The docking support in this driver does not take care of enabling or -disabling any other devices you may have attached to the dock. For -example, a CD drive plugged into the UltraBase needs to be disabled or -enabled separately. See the provided example acpid configuration files -for how this can be accomplished. - -There is no support yet for PCI devices that may be attached to a -docking station, e.g. in the ThinkPad Dock II. The driver currently -does not recognize, enable or disable such devices. This means that -the only docking stations currently supported are the X-series -UltraBase docks and "dumb" port replicators like the Mini Dock (the -latter don't need any ACPI support, actually). - - -UltraBay eject -- /proc/acpi/ibm/bay ------------------------------------- - -Inserting or ejecting an UltraBay device requires some actions to be -taken by the operating system to safely make or break the electrical -connections with the device. - -This feature generates the following ACPI events: - - ibm/bay MSTR 00000003 00000000 -- eject request - ibm/bay MSTR 00000001 00000000 -- eject lever inserted - -NOTE: These events will only be generated if the UltraBay was present -when the laptop was originally booted (on the X series, the UltraBay -is in the dock, so it may not be present if the laptop was undocked). -This is due to the current lack of support for hot plugging of devices -in the Linux ACPI framework. If the laptop was booted without the -UltraBay, the following message is shown in the logs: - - Mar 17 01:42:34 aero kernel: thinkpad_acpi: bay device not present - -In this case, no bay-related events are generated but the eject -command described below still works. It can be executed manually or -triggered by a hot key combination. - -Sliding the eject lever generates the first event shown above. The -handler for this event should take whatever actions are necessary to -shut down the device in the UltraBay (e.g. call idectl), then issue -the following command: - - echo eject > /proc/acpi/ibm/bay - -After the LED on the UltraBay goes off, it is safe to pull out the -device. - -When the eject lever is inserted, the second event above is -generated. The handler for this event should take whatever actions are -necessary to enable the UltraBay device (e.g. call idectl). - -The contents of the /proc/acpi/ibm/bay file shows the current status -of the UltraBay, as provided by the ACPI framework. - -EXPERIMENTAL warm eject support on the 600e/x, A22p and A3x (To use -this feature, you need to supply the experimental=1 parameter when -loading the module): - -These models do not have a button near the UltraBay device to request -a hot eject but rather require the laptop to be put to sleep -(suspend-to-ram) before the bay device is ejected or inserted). -The sequence of steps to eject the device is as follows: - - echo eject > /proc/acpi/ibm/bay - put the ThinkPad to sleep - remove the drive - resume from sleep - cat /proc/acpi/ibm/bay should show that the drive was removed - -On the A3x, both the UltraBay 2000 and UltraBay Plus devices are -supported. Use "eject2" instead of "eject" for the second bay. - -Note: the UltraBay eject support on the 600e/x, A22p and A3x is -EXPERIMENTAL and may not work as expected. USE WITH CAUTION! - - CMOS/UCMS control ----------------- diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 9ebcd6ef361..950cde6d6e5 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -1,7 +1,9 @@ -/*P:100 This is the Launcher code, a simple program which lays out the - * "physical" memory for the new Guest by mapping the kernel image and - * the virtual devices, then opens /dev/lguest to tell the kernel - * about the Guest and control it. :*/ +/*P:100 + * This is the Launcher code, a simple program which lays out the "physical" + * memory for the new Guest by mapping the kernel image and the virtual + * devices, then opens /dev/lguest to tell the kernel about the Guest and + * control it. +:*/ #define _LARGEFILE64_SOURCE #define _GNU_SOURCE #include <stdio.h> @@ -46,13 +48,15 @@ #include "linux/virtio_rng.h" #include "linux/virtio_ring.h" #include "asm/bootparam.h" -/*L:110 We can ignore the 39 include files we need for this program, but I do - * want to draw attention to the use of kernel-style types. +/*L:110 + * We can ignore the 42 include files we need for this program, but I do want + * to draw attention to the use of kernel-style types. * * As Linus said, "C is a Spartan language, and so should your naming be." I * like these abbreviations, so we define them here. Note that u64 is always * unsigned long long, which works on all Linux systems: this means that we can - * use %llu in printf for any u64. */ + * use %llu in printf for any u64. + */ typedef unsigned long long u64; typedef uint32_t u32; typedef uint16_t u16; @@ -69,8 +73,10 @@ typedef uint8_t u8; /* This will occupy 3 pages: it must be a power of 2. */ #define VIRTQUEUE_NUM 256 -/*L:120 verbose is both a global flag and a macro. The C preprocessor allows - * this, and although I wouldn't recommend it, it works quite nicely here. */ +/*L:120 + * verbose is both a global flag and a macro. The C preprocessor allows + * this, and although I wouldn't recommend it, it works quite nicely here. + */ static bool verbose; #define verbose(args...) \ do { if (verbose) printf(args); } while(0) @@ -87,8 +93,7 @@ static int lguest_fd; static unsigned int __thread cpu_id; /* This is our list of devices. */ -struct device_list -{ +struct device_list { /* Counter to assign interrupt numbers. */ unsigned int next_irq; @@ -100,8 +105,7 @@ struct device_list /* A single linked list of devices. */ struct device *dev; - /* And a pointer to the last device for easy append and also for - * configuration appending. */ + /* And a pointer to the last device for easy append. */ struct device *lastdev; }; @@ -109,8 +113,7 @@ struct device_list static struct device_list devices; /* The device structure describes a single device. */ -struct device -{ +struct device { /* The linked-list pointer. */ struct device *next; @@ -135,8 +138,7 @@ struct device }; /* The virtqueue structure describes a queue attached to a device. */ -struct virtqueue -{ +struct virtqueue { struct virtqueue *next; /* Which device owns me. */ @@ -168,20 +170,24 @@ static char **main_args; /* The original tty settings to restore on exit. */ static struct termios orig_term; -/* We have to be careful with barriers: our devices are all run in separate +/* + * We have to be careful with barriers: our devices are all run in separate * threads and so we need to make sure that changes visible to the Guest happen - * in precise order. */ + * in precise order. + */ #define wmb() __asm__ __volatile__("" : : : "memory") #define mb() __asm__ __volatile__("" : : : "memory") -/* Convert an iovec element to the given type. +/* + * Convert an iovec element to the given type. * * This is a fairly ugly trick: we need to know the size of the type and * alignment requirement to check the pointer is kosher. It's also nice to * have the name of the type in case we report failure. * * Typing those three things all the time is cumbersome and error prone, so we - * have a macro which sets them all up and passes to the real function. */ + * have a macro which sets them all up and passes to the real function. + */ #define convert(iov, type) \ ((type *)_convert((iov), sizeof(type), __alignof__(type), #type)) @@ -198,8 +204,10 @@ static void *_convert(struct iovec *iov, size_t size, size_t align, /* Wrapper for the last available index. Makes it easier to change. */ #define lg_last_avail(vq) ((vq)->last_avail_idx) -/* The virtio configuration space is defined to be little-endian. x86 is - * little-endian too, but it's nice to be explicit so we have these helpers. */ +/* + * The virtio configuration space is defined to be little-endian. x86 is + * little-endian too, but it's nice to be explicit so we have these helpers. + */ #define cpu_to_le16(v16) (v16) #define cpu_to_le32(v32) (v32) #define cpu_to_le64(v64) (v64) @@ -241,11 +249,12 @@ static u8 *get_feature_bits(struct device *dev) + dev->num_vq * sizeof(struct lguest_vqconfig); } -/*L:100 The Launcher code itself takes us out into userspace, that scary place - * where pointers run wild and free! Unfortunately, like most userspace - * programs, it's quite boring (which is why everyone likes to hack on the - * kernel!). Perhaps if you make up an Lguest Drinking Game at this point, it - * will get you through this section. Or, maybe not. +/*L:100 + * The Launcher code itself takes us out into userspace, that scary place where + * pointers run wild and free! Unfortunately, like most userspace programs, + * it's quite boring (which is why everyone likes to hack on the kernel!). + * Perhaps if you make up an Lguest Drinking Game at this point, it will get + * you through this section. Or, maybe not. * * The Launcher sets up a big chunk of memory to be the Guest's "physical" * memory and stores it in "guest_base". In other words, Guest physical == @@ -253,7 +262,8 @@ static u8 *get_feature_bits(struct device *dev) * * This can be tough to get your head around, but usually it just means that we * use these trivial conversion functions when the Guest gives us it's - * "physical" addresses: */ + * "physical" addresses: + */ static void *from_guest_phys(unsigned long addr) { return guest_base + addr; @@ -268,7 +278,8 @@ static unsigned long to_guest_phys(const void *addr) * Loading the Kernel. * * We start with couple of simple helper routines. open_or_die() avoids - * error-checking code cluttering the callers: */ + * error-checking code cluttering the callers: + */ static int open_or_die(const char *name, int flags) { int fd = open(name, flags); @@ -283,12 +294,19 @@ static void *map_zeroed_pages(unsigned int num) int fd = open_or_die("/dev/zero", O_RDONLY); void *addr; - /* We use a private mapping (ie. if we write to the page, it will be - * copied). */ + /* + * We use a private mapping (ie. if we write to the page, it will be + * copied). + */ addr = mmap(NULL, getpagesize() * num, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) err(1, "Mmaping %u pages of /dev/zero", num); + + /* + * One neat mmap feature is that you can close the fd, and it + * stays mapped. + */ close(fd); return addr; @@ -305,20 +323,24 @@ static void *get_pages(unsigned int num) return addr; } -/* This routine is used to load the kernel or initrd. It tries mmap, but if +/* + * This routine is used to load the kernel or initrd. It tries mmap, but if * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries), - * it falls back to reading the memory in. */ + * it falls back to reading the memory in. + */ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) { ssize_t r; - /* We map writable even though for some segments are marked read-only. + /* + * We map writable even though for some segments are marked read-only. * The kernel really wants to be writable: it patches its own * instructions. * * MAP_PRIVATE means that the page won't be copied until a write is * done to it. This allows us to share untouched memory between - * Guests. */ + * Guests. + */ if (mmap(addr, len, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED) return; @@ -329,7 +351,8 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) err(1, "Reading offset %lu len %lu gave %zi", offset, len, r); } -/* This routine takes an open vmlinux image, which is in ELF, and maps it into +/* + * This routine takes an open vmlinux image, which is in ELF, and maps it into * the Guest memory. ELF = Embedded Linking Format, which is the format used * by all modern binaries on Linux including the kernel. * @@ -337,23 +360,28 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) * address. We use the physical address; the Guest will map itself to the * virtual address. * - * We return the starting address. */ + * We return the starting address. + */ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) { Elf32_Phdr phdr[ehdr->e_phnum]; unsigned int i; - /* Sanity checks on the main ELF header: an x86 executable with a - * reasonable number of correctly-sized program headers. */ + /* + * Sanity checks on the main ELF header: an x86 executable with a + * reasonable number of correctly-sized program headers. + */ if (ehdr->e_type != ET_EXEC || ehdr->e_machine != EM_386 || ehdr->e_phentsize != sizeof(Elf32_Phdr) || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr)) errx(1, "Malformed elf header"); - /* An ELF executable contains an ELF header and a number of "program" + /* + * An ELF executable contains an ELF header and a number of "program" * headers which indicate which parts ("segments") of the program to - * load where. */ + * load where. + */ /* We read in all the program headers at once: */ if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0) @@ -361,8 +389,10 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) err(1, "Reading program headers"); - /* Try all the headers: there are usually only three. A read-only one, - * a read-write one, and a "note" section which we don't load. */ + /* + * Try all the headers: there are usually only three. A read-only one, + * a read-write one, and a "note" section which we don't load. + */ for (i = 0; i < ehdr->e_phnum; i++) { /* If this isn't a loadable segment, we ignore it */ if (phdr[i].p_type != PT_LOAD) @@ -380,13 +410,15 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) return ehdr->e_entry; } -/*L:150 A bzImage, unlike an ELF file, is not meant to be loaded. You're - * supposed to jump into it and it will unpack itself. We used to have to - * perform some hairy magic because the unpacking code scared me. +/*L:150 + * A bzImage, unlike an ELF file, is not meant to be loaded. You're supposed + * to jump into it and it will unpack itself. We used to have to perform some + * hairy magic because the unpacking code scared me. * * Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote * a small patch to jump over the tricky bits in the Guest, so now we just read - * the funky header so we know where in the file to load, and away we go! */ + * the funky header so we know where in the file to load, and away we go! + */ static unsigned long load_bzimage(int fd) { struct boot_params boot; @@ -394,8 +426,10 @@ static unsigned long load_bzimage(int fd) /* Modern bzImages get loaded at 1M. */ void *p = from_guest_phys(0x100000); - /* Go back to the start of the file and read the header. It should be - * a Linux boot header (see Documentation/x86/i386/boot.txt) */ + /* + * Go back to the start of the file and read the header. It should be + * a Linux boot header (see Documentation/x86/i386/boot.txt) + */ lseek(fd, 0, SEEK_SET); read(fd, &boot, sizeof(boot)); @@ -414,9 +448,11 @@ static unsigned long load_bzimage(int fd) return boot.hdr.code32_start; } -/*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels +/*L:140 + * Loading the kernel is easy when it's a "vmlinux", but most kernels * come wrapped up in the self-decompressing "bzImage" format. With a little - * work, we can load those, too. */ + * work, we can load those, too. + */ static unsigned long load_kernel(int fd) { Elf32_Ehdr hdr; @@ -433,24 +469,28 @@ static unsigned long load_kernel(int fd) return load_bzimage(fd); } -/* This is a trivial little helper to align pages. Andi Kleen hated it because +/* + * This is a trivial little helper to align pages. Andi Kleen hated it because * it calls getpagesize() twice: "it's dumb code." * * Kernel guys get really het up about optimization, even when it's not - * necessary. I leave this code as a reaction against that. */ + * necessary. I leave this code as a reaction against that. + */ static inline unsigned long page_align(unsigned long addr) { /* Add upwards and truncate downwards. */ return ((addr + getpagesize()-1) & ~(getpagesize()-1)); } -/*L:180 An "initial ram disk" is a disk image loaded into memory along with - * the kernel which the kernel can use to boot from without needing any - * drivers. Most distributions now use this as standard: the initrd contains - * the code to load the appropriate driver modules for the current machine. +/*L:180 + * An "initial ram disk" is a disk image loaded into memory along with the + * kernel which the kernel can use to boot from without needing any drivers. + * Most distributions now use this as standard: the initrd contains the code to + * load the appropriate driver modules for the current machine. * * Importantly, James Morris works for RedHat, and Fedora uses initrds for its - * kernels. He sent me this (and tells me when I break it). */ + * kernels. He sent me this (and tells me when I break it). + */ static unsigned long load_initrd(const char *name, unsigned long mem) { int ifd; @@ -462,12 +502,16 @@ static unsigned long load_initrd(const char *name, unsigned long mem) if (fstat(ifd, &st) < 0) err(1, "fstat() on initrd '%s'", name); - /* We map the initrd at the top of memory, but mmap wants it to be - * page-aligned, so we round the size up for that. */ + /* + * We map the initrd at the top of memory, but mmap wants it to be + * page-aligned, so we round the size up for that. + */ len = page_align(st.st_size); map_at(ifd, from_guest_phys(mem - len), 0, st.st_size); - /* Once a file is mapped, you can close the file descriptor. It's a - * little odd, but quite useful. */ + /* + * Once a file is mapped, you can close the file descriptor. It's a + * little odd, but quite useful. + */ close(ifd); verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len); @@ -476,8 +520,10 @@ static unsigned long load_initrd(const char *name, unsigned long mem) } /*:*/ -/* Simple routine to roll all the commandline arguments together with spaces - * between them. */ +/* + * Simple routine to roll all the commandline arguments together with spaces + * between them. + */ static void concat(char *dst, char *args[]) { unsigned int i, len = 0; @@ -494,10 +540,12 @@ static void concat(char *dst, char *args[]) dst[len] = '\0'; } -/*L:185 This is where we actually tell the kernel to initialize the Guest. We +/*L:185 + * This is where we actually tell the kernel to initialize the Guest. We * saw the arguments it expects when we looked at initialize() in lguest_user.c: * the base of Guest "physical" memory, the top physical page to allow and the - * entry point for the Guest. */ + * entry point for the Guest. + */ static void tell_kernel(unsigned long start) { unsigned long args[] = { LHREQ_INITIALIZE, @@ -511,7 +559,7 @@ static void tell_kernel(unsigned long start) } /*:*/ -/* +/*L:200 * Device Handling. * * When the Guest gives us a buffer, it sends an array of addresses and sizes. @@ -522,20 +570,26 @@ static void tell_kernel(unsigned long start) static void *_check_pointer(unsigned long addr, unsigned int size, unsigned int line) { - /* We have to separately check addr and addr+size, because size could - * be huge and addr + size might wrap around. */ + /* + * We have to separately check addr and addr+size, because size could + * be huge and addr + size might wrap around. + */ if (addr >= guest_limit || addr + size >= guest_limit) errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr); - /* We return a pointer for the caller's convenience, now we know it's - * safe to use. */ + /* + * We return a pointer for the caller's convenience, now we know it's + * safe to use. + */ return from_guest_phys(addr); } /* A macro which transparently hands the line number to the real function. */ #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) -/* Each buffer in the virtqueues is actually a chain of descriptors. This +/* + * Each buffer in the virtqueues is actually a chain of descriptors. This * function returns the next descriptor in the chain, or vq->vring.num if we're - * at the end. */ + * at the end. + */ static unsigned next_desc(struct vring_desc *desc, unsigned int i, unsigned int max) { @@ -556,7 +610,10 @@ static unsigned next_desc(struct vring_desc *desc, return next; } -/* This actually sends the interrupt for this virtqueue */ +/* + * This actually sends the interrupt for this virtqueue, if we've used a + * buffer. + */ static void trigger_irq(struct virtqueue *vq) { unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; @@ -576,12 +633,14 @@ static void trigger_irq(struct virtqueue *vq) err(1, "Triggering irq %i", vq->config.irq); } -/* This looks in the virtqueue and for the first available buffer, and converts +/* + * This looks in the virtqueue for the first available buffer, and converts * it to an iovec for convenient access. Since descriptors consist of some * number of output then some number of input descriptors, it's actually two * iovecs, but we pack them into one and note how many of each there were. * - * This function returns the descriptor number found. */ + * This function waits if necessary, and returns the descriptor number found. + */ static unsigned wait_for_vq_desc(struct virtqueue *vq, struct iovec iov[], unsigned int *out_num, unsigned int *in_num) @@ -590,17 +649,23 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, struct vring_desc *desc; u16 last_avail = lg_last_avail(vq); + /* There's nothing available? */ while (last_avail == vq->vring.avail->idx) { u64 event; - /* OK, tell Guest about progress up to now. */ + /* + * Since we're about to sleep, now is a good time to tell the + * Guest about what we've used up to now. + */ trigger_irq(vq); /* OK, now we need to know about added descriptors. */ vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; - /* They could have slipped one in as we were doing that: make - * sure it's written, then check again. */ + /* + * They could have slipped one in as we were doing that: make + * sure it's written, then c |