diff options
Diffstat (limited to 'Documentation')
22 files changed, 602 insertions, 858 deletions
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index b886f52a9aa..e5da4f2b7c2 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -240,17 +240,23 @@ X!Ilib/string.c <sect1><title>Driver Support</title> !Enet/core/dev.c !Enet/ethernet/eth.c +!Enet/sched/sch_generic.c !Iinclude/linux/etherdevice.h +!Iinclude/linux/netdevice.h + </sect1> + <sect1><title>PHY Support</title> !Edrivers/net/phy/phy.c !Idrivers/net/phy/phy.c !Edrivers/net/phy/phy_device.c !Idrivers/net/phy/phy_device.c !Edrivers/net/phy/mdio_bus.c !Idrivers/net/phy/mdio_bus.c + </sect1> <!-- FIXME: Removed for now since no structured comments in source + <sect1><title>Wireless</title> X!Enet/core/wireless.c ---> </sect1> +--> <sect1><title>Synchronous PPP</title> !Edrivers/net/wan/syncppp.c </sect1> diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 8af392fc6ef..dc3f49e3e53 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -477,9 +477,9 @@ With this multipage bio design: the same bi_io_vec array, but with the index and size accordingly modified) - A linked list of bios is used as before for unrelated merges (*) - this avoids reallocs and makes independent completions easier to handle. -- Code that traverses the req list needs to make a distinction between - segments of a request (bio_for_each_segment) and the distinct completion - units/bios (rq_for_each_bio). +- Code that traverses the req list can find all the segments of a bio + by using rq_for_each_segment. This handles the fact that a request + has multiple bios, each of which can have multiple segments. - Drivers which can't process a large bio in one shot can use the bi_idx field to keep track of the next bio_vec entry to process. (e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE) @@ -664,14 +664,14 @@ in lvm or md. 3.2.1 Traversing segments and completion units in a request -The macros bio_for_each_segment() and rq_for_each_bio() should be used for -traversing the bios in the request list (drivers should avoid directly -trying to do it themselves). Using these helpers should also make it easier -to cope with block changes in the future. +The macro rq_for_each_segment() should be used for traversing the bios +in the request list (drivers should avoid directly trying to do it +themselves). Using these helpers should also make it easier to cope +with block changes in the future. - rq_for_each_bio(bio, rq) - bio_for_each_segment(bio_vec, bio, i) - /* bio_vec is now current segment */ + struct req_iterator iter; + rq_for_each_segment(bio_vec, rq, iter) + /* bio_vec is now current segment */ I/O completion callbacks are per-bio rather than per-segment, so drivers that traverse bio chains on completion need to keep that in mind. Drivers diff --git a/Documentation/block/ioprio.txt b/Documentation/block/ioprio.txt index 1b930ef5a07..35e516b0b8a 100644 --- a/Documentation/block/ioprio.txt +++ b/Documentation/block/ioprio.txt @@ -86,8 +86,15 @@ extern int sys_ioprio_get(int, int); #error "Unsupported arch" #endif -_syscall3(int, ioprio_set, int, which, int, who, int, ioprio); -_syscall2(int, ioprio_get, int, which, int, who); +static inline int ioprio_set(int which, int who, int ioprio) +{ + return syscall(__NR_ioprio_set, which, who, ioprio); +} + +static inline int ioprio_get(int which, int who) +{ + return syscall(__NR_ioprio_get, which, who); +} enum { IOPRIO_CLASS_NONE, diff --git a/Documentation/dvb/faq.txt b/Documentation/dvb/faq.txt index dbcedf5833e..2511a335abd 100644 --- a/Documentation/dvb/faq.txt +++ b/Documentation/dvb/faq.txt @@ -150,7 +150,7 @@ Some very frequently asked questions about linuxtv-dvb - saa7146_vv: SAA7146 video and vbi functions. These are only needed for full-featured cards. - - video-buf: capture helper module for the saa7146_vv driver. This + - videobuf-dma-sg: capture helper module for the saa7146_vv driver. This one is responsible to handle capture buffers. - dvb-ttpci: The main driver for AV7110 based, full-featured diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 00928d2ecfb..63df2262d41 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -306,3 +306,24 @@ Why: In kernel tree version of driver is unmaintained. Sk98lin driver Who: Stephen Hemminger <shemminger@linux-foundation.org> --------------------------- + +What: i386/x86_64 bzImage symlinks +When: April 2008 + +Why: The i386/x86_64 merge provides a symlink to the old bzImage + location so not yet updated user space tools, e.g. package + scripts, do not break. +Who: Thomas Gleixner <tglx@linutronix.de> + +--------------------------- + +What: shaper network driver +When: January 2008 +Files: drivers/net/shaper.c, include/linux/if_shaper.h +Why: This driver has been marked obsolete for many years. + It was only designed to work on lower speed links and has design + flaws that lead to machine crashes. The qdisc infrastructure in + 2.4 or later kernels, provides richer features and is more robust. +Who: Stephen Hemminger <shemminger@linux-foundation.org> + +--------------------------- diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt index 8ec54b974b6..744687dd195 100644 --- a/Documentation/infiniband/user_mad.txt +++ b/Documentation/infiniband/user_mad.txt @@ -99,6 +99,20 @@ Transaction IDs request/response pairs. The upper 32 bits are reserved for use by the kernel and will be overwritten before a MAD is sent. +P_Key Index Handling + + The old ib_umad interface did not allow setting the P_Key index for + MADs that are sent and did not provide a way for obtaining the P_Key + index of received MADs. A new layout for struct ib_user_mad_hdr + with a pkey_index member has been defined; however, to preserve + binary compatibility with older applications, this new layout will + not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called + before a file descriptor is used for anything else. + + In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented + to 6, the new layout of struct ib_user_mad_hdr will be used by + default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed. + Setting IsSM Capability Bit To set the IsSM capability bit for a port, simply open the diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4d175c75124..a57c1f216b2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -35,6 +35,7 @@ parameter is applicable: APIC APIC support is enabled. APM Advanced Power Management support is enabled. AX25 Appropriate AX.25 support is enabled. + BLACKFIN Blackfin architecture is enabled. DRM Direct Rendering Management support is enabled. EDD BIOS Enhanced Disk Drive Services (EDD) is enabled EFI EFI Partitioning (GPT) is enabled @@ -550,7 +551,7 @@ and is between 256 and 4096 characters. It is defined in the file dtc3181e= [HW,SCSI] - earlyprintk= [X86-32,X86-64,SH] + earlyprintk= [X86-32,X86-64,SH,BLACKFIN] earlyprintk=vga earlyprintk=serial[,ttySn[,baudrate]] diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 73c5f1f3d5d..103e346c8b6 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -46,7 +46,7 @@ typedef uint32_t u32; typedef uint16_t u16; typedef uint8_t u8; #include "../../include/linux/lguest_launcher.h" -#include "../../include/asm-i386/e820.h" +#include "../../include/asm-x86/e820_32.h" /*:*/ #define PAGE_PRESENT 0x7 /* Present, RW, Execute */ diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt new file mode 100644 index 00000000000..4ba4664ce5c --- /dev/null +++ b/Documentation/lockstat.txt @@ -0,0 +1,120 @@ + +LOCK STATISTICS + +- WHAT + +As the name suggests, it provides statistics on locks. + +- WHY + +Because things like lock contention can severely impact performance. + +- HOW + +Lockdep already has hooks in the lock functions and maps lock instances to +lock classes. We build on that. The graph below shows the relation between +the lock functions and the various hooks therein. + + __acquire + | + lock _____ + | \ + | __contended + | | + | <wait> + | _______/ + |/ + | + __acquired + | + . + <hold> + . + | + __release + | + unlock + +lock, unlock - the regular lock functions +__* - the hooks +<> - states + +With these hooks we provide the following statistics: + + con-bounces - number of lock contention that involved x-cpu data + contentions - number of lock acquisitions that had to wait + wait time min - shortest (non-0) time we ever had to wait for a lock + max - longest time we ever had to wait for a lock + total - total time we spend waiting on this lock + acq-bounces - number of lock acquisitions that involved x-cpu data + acquisitions - number of times we took the lock + hold time min - shortest (non-0) time we ever held the lock + max - longest time we ever held the lock + total - total time this lock was held + +From these number various other statistics can be derived, such as: + + hold time average = hold time total / acquisitions + +These numbers are gathered per lock class, per read/write state (when +applicable). + +It also tracks 4 contention points per class. A contention point is a call site +that had to wait on lock acquisition. + + - USAGE + +Look at the current lock statistics: + +( line numbers not part of actual output, done for clarity in the explanation + below ) + +# less /proc/lock_stat + +01 lock_stat version 0.2 +02 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total +04 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +05 +06 &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 +07 &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 +08 -------------------------- +09 &inode->i_data.tree_lock 0 [<ffffffff8027c08f>] add_to_page_cache+0x5f/0x190 +10 +11 ............................................................................................................................................................................................... +12 +13 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 +14 ----------- +15 dcache_lock 180 [<ffffffff802c0d7e>] sys_getcwd+0x11e/0x230 +16 dcache_lock 165 [<ffffffff802c002a>] d_alloc+0x15a/0x210 +17 dcache_lock 33 [<ffffffff8035818d>] _atomic_dec_and_lock+0x4d/0x70 +18 dcache_lock 1 [<ffffffff802beef8>] shrink_dcache_parent+0x18/0x130 + +This excerpt shows the first two lock class statistics. Line 01 shows the +output version - each time the format changes this will be updated. Line 02-04 +show the header with column descriptions. Lines 05-10 and 13-18 show the actual +statistics. These statistics come in two parts; the actual stats separated by a +short separator (line 08, 14) from the contention points. + +The first lock (05-10) is a read/write lock, and shows two lines above the +short separator. The contention points don't match the column descriptors, +they have two: contentions and [<IP>] symbol. + + +View the top contending locks: + +# grep : /proc/lock_stat | head + &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 + &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 + dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 + &inode->i_mutex: 161 286 18446744073709 62882.54 1244614.55 3653 20598 18446744073709 62318.60 1693822.74 + &zone->lru_lock: 94 94 0.53 7.33 92.10 4366 32690 0.29 59.81 16350.06 + &inode->i_data.i_mmap_lock: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44 + &q->__queue_lock: 48 50 0.52 31.62 86.31 774 13131 0.17 113.08 12277.52 + &rq->rq_lock_key: 43 47 0.74 68.50 170.63 3706 33929 0.22 107.99 17460.62 + &rq->rq_lock_key#2: 39 46 0.75 6.68 49.03 2979 32292 0.17 125.17 17137.63 + tasklist_lock-W: 15 15 1.45 10.87 32.70 1201 7390 0.58 62.55 13648.47 + +Clear the statistics: + +# echo 0 > /proc/lock_stat diff --git a/Documentation/networking/NAPI_HOWTO.txt b/Documentation/networking/NAPI_HOWTO.txt deleted file mode 100644 index 7907435a661..00000000000 --- a/Documentation/networking/NAPI_HOWTO.txt +++ /dev/null @@ -1,766 +0,0 @@ -HISTORY: -February 16/2002 -- revision 0.2.1: -COR typo corrected -February 10/2002 -- revision 0.2: -some spell checking ;-> -January 12/2002 -- revision 0.1 -This is still work in progress so may change. -To keep up to date please watch this space. - -Introduction to NAPI -==================== - -NAPI is a proven (www.cyberus.ca/~hadi/usenix-paper.tgz) technique -to improve network performance on Linux. For more details please -read that paper. -NAPI provides a "inherent mitigation" which is bound by system capacity -as can be seen from the following data collected by Robert on Gigabit -ethernet (e1000): - - Psize Ipps Tput Rxint Txint Done Ndone - --------------------------------------------------------------- - 60 890000 409362 17 27622 7 6823 - 128 758150 464364 21 9301 10 7738 - 256 445632 774646 42 15507 21 12906 - 512 232666 994445 241292 19147 241192 1062 - 1024 119061 1000003 872519 19258 872511 0 - 1440 85193 1000003 946576 19505 946569 0 - - -Legend: -"Ipps" stands for input packets per second. -"Tput" == packets out of total 1M that made it out. -"txint" == transmit completion interrupts seen -"Done" == The number of times that the poll() managed to pull all -packets out of the rx ring. Note from this that the lower the -load the more we could clean up the rxring -"Ndone" == is the converse of "Done". Note again, that the higher -the load the more times we couldn't clean up the rxring. - -Observe that: -when the NIC receives 890Kpackets/sec only 17 rx interrupts are generated. -The system cant handle the processing at 1 interrupt/packet at that load level. -At lower rates on the other hand, rx interrupts go up and therefore the -interrupt/packet ratio goes up (as observable from that table). So there is -possibility that under low enough input, you get one poll call for each -input packet caused by a single interrupt each time. And if the system -cant handle interrupt per packet ratio of 1, then it will just have to -chug along .... - - -0) Prerequisites: -================== -A driver MAY continue using the old 2.4 technique for interfacing -to the network stack and not benefit from the NAPI changes. -NAPI additions to the kernel do not break backward compatibility. -NAPI, however, requires the following features to be available: - -A) DMA ring or enough RAM to store packets in software devices. - -B) Ability to turn off interrupts or maybe events that send packets up -the stack. - -NAPI processes packet events in what is known as dev->poll() method. -Typically, only packet receive events are processed in dev->poll(). -The rest of the events MAY be processed by the regular interrupt handler -to reduce processing latency (justified also because there are not that -many of them). -Note, however, NAPI does not enforce that dev->poll() only processes -receive events. -Tests with the tulip driver indicated slightly increased latency if -all of the interrupt handler is moved to dev->poll(). Also MII handling -gets a little trickier. -The example used in this document is to move the receive processing only -to dev->poll(); this is shown with the patch for the tulip driver. -For an example of code that moves all the interrupt driver to -dev->poll() look at the ported e1000 code. - -There are caveats that might force you to go with moving everything to -dev->poll(). Different NICs work differently depending on their status/event -acknowledgement setup. -There are two types of event register ACK mechanisms. - I) what is known as Clear-on-read (COR). - when you read the status/event register, it clears everything! - The natsemi and sunbmac NICs are known to do this. - In this case your only choice is to move all to dev->poll() - - II) Clear-on-write (COW) - i) you clear the status by writing a 1 in the bit-location you want. - These are the majority of the NICs and work the best with NAPI. - Put only receive events in dev->poll(); leave the rest in - the old interrupt handler. - ii) whatever you write in the status register clears every thing ;-> - Cant seem to find any supported by Linux which do this. If - someone knows such a chip email us please. - Move all to dev->poll() - -C) Ability to detect new work correctly. -NAPI works by shutting down event interrupts when there's work and -turning them on when there's none. -New packets might show up in the small window while interrupts were being -re-enabled (refer to appendix 2). A packet might sneak in during the period -we are enabling interrupts. We only get to know about such a packet when the -next new packet arrives and generates an interrupt. -Essentially, there is a small window of opportunity for a race condition -which for clarity we'll refer to as the "rotting packet". - -This is a very important topic and appendix 2 is dedicated for more -discussion. - -Locking rules and environmental guarantees -========================================== - --Guarantee: Only one CPU at any time can call dev->poll(); this is because -only one CPU can pick the initial interrupt and hence the initial -netif_rx_schedule(dev); -- The core layer invokes devices to send packets in a round robin format. -This implies receive is totally lockless because of the guarantee that only -one CPU is executing it. -- contention can only be the result of some other CPU accessing the rx -ring. This happens only in close() and suspend() (when these methods -try to clean the rx ring); -****guarantee: driver authors need not worry about this; synchronization -is taken care for them by the top net layer. --local interrupts are enabled (if you dont move all to dev->poll()). For -example link/MII and txcomplete continue functioning just same old way. -This improves the latency of processing these events. It is also assumed that -the receive interrupt is the largest cause of noise. Note this might not -always be true. -[according to Manfred Spraul, the winbond insists on sending one -txmitcomplete interrupt for each packet (although this can be mitigated)]. -For these broken drivers, move all to dev->poll(). - -For the rest of this text, we'll assume that dev->poll() only -processes receive events. - -new methods introduce by NAPI -============================= - -a) netif_rx_schedule(dev) -Called by an IRQ handler to schedule a poll for device - -b) netif_rx_schedule_prep(dev) -puts the device in a state which allows for it to be added to the -CPU polling list if it is up and running. You can look at this as -the first half of netif_rx_schedule(dev) above; the second half -being c) below. - -c) __netif_rx_schedule(dev) -Add device to the poll list for this CPU; assuming that _prep above -has already been called and returned 1. - -d) netif_rx_reschedule(dev, undo) -Called to reschedule polling for device specifically for some -deficient hardware. Read Appendix 2 for more details. - -e) netif_rx_complete(dev) - -Remove interface from the CPU poll list: it must be in the poll list -on current cpu. This primitive is called by dev->poll(), when -it completes its work. The device cannot be out of poll list at this -call, if it is then clearly it is a BUG(). You'll know ;-> - -All of the above methods are used below, so keep reading for clarity. - -Device driver changes to be made when porting NAPI -================================================== - -Below we describe what kind of changes are required for NAPI to work. - -1) introduction of dev->poll() method -===================================== - -This is the method that is invoked by the network core when it requests -for new packets from the driver. A driver is allowed to send upto -dev->quota packets by the current CPU before yielding to the network -subsystem (so other devices can also get opportunity to send to the stack). - -dev->poll() prototype looks as follows: -int my_poll(struct net_device *dev, int *budget) - -budget is the remaining number of packets the network subsystem on the -current CPU can send up the stack before yielding to other system tasks. -*Each driver is responsible for decrementing budget by the total number of -packets sent. - Total number of packets cannot exceed dev->quota. - -dev->poll() method is invoked by the top layer, the driver just sends if it -can to the stack the packet quantity requested. - -more on dev->poll() below after the interrupt changes are explained. - -2) registering dev->poll() method -=================================== - -dev->poll should be set in the dev->probe() method. -e.g: -dev->open = my_open; -. -. -/* two new additions */ -/* first register my poll method */ -dev->poll = my_poll; -/* next register my weight/quanta; can be overridden in /proc */ -dev->weight = 16; -. -. -dev->stop = my_close; - - - -3) scheduling dev->poll() -============================= -This involves modifying the interrupt handler and the code -path which takes the packet off the NIC and sends them to the -stack. - -it's important at this point to introduce the classical D Becker -interrupt processor: - ------------------- -static irqreturn_t -netdevice_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - - struct net_device *dev = (struct net_device *)dev_instance; - struct my_private *tp = (struct my_private *)dev->priv; - - int work_count = my_work_count; - status = read_interrupt_status_reg(); - if (status == 0) - return IRQ_NONE; /* Shared IRQ: not us */ - if (status == 0xffff) - return IRQ_HANDLED; /* Hot unplug */ - if (status & error) - do_some_error_handling() - - do { - acknowledge_ints_ASAP(); - - if (status & link_interrupt) { - spin_lock(&tp->link_lock); - do_some_link_stat_stuff(); - spin_lock(&tp->link_lock); - } - - if (status & rx_interrupt) { - receive_packets(dev); - } - - if (status & rx_nobufs) { - make_rx_buffs_avail(); - } - - if (status & tx_related) { - spin_lock(&tp->lock); - tx_ring_free(dev); - if (tx_died) - restart_tx(); - spin_unlock(&tp->lock); - } - - status = read_interrupt_status_reg(); - - } while (!(status & error) || more_work_to_be_done); - return IRQ_HANDLED; -} - ----------------------------------------------------------------------- - -We now change this to what is shown below to NAPI-enable it: - ----------------------------------------------------------------------- -static irqreturn_t -netdevice_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - struct net_device *dev = (struct net_device *)dev_instance; - struct my_private *tp = (struct my_private *)dev->priv; - - status = read_interrupt_status_reg(); - if (status == 0) - return IRQ_NONE; /* Shared IRQ: not us */ - if (status == 0xffff) - return IRQ_HANDLED; /* Hot unplug */ - if (status & error) - do_some_error_handling(); - - do { -/************************ start note *********************************/ - acknowledge_ints_ASAP(); // dont ack rx and rxnobuff here -/************************ end note *********************************/ - - if (status & link_interrupt) { - spin_lock(&tp->link_lock); - do_some_link_stat_stuff(); - spin_unlock(&tp->link_lock); - } -/************************ start note *********************************/ - if (status & rx_interrupt || (status & rx_nobuffs)) { - if (netif_rx_schedule_prep(dev)) { - - /* disable interrupts caused - * by arriving packets */ - disable_rx_and_rxnobuff_ints(); - /* tell system we have work to be done. */ - __netif_rx_schedule(dev); - } else { - printk("driver bug! interrupt while in poll\n"); - /* FIX by disabling interrupts */ - disable_rx_and_rxnobuff_ints(); - } - } -/************************ end note note *********************************/ - - if (status & tx_related) { - spin_lock(&tp->lock); - tx_ring_free(dev); - - if (tx_died) - restart_tx(); - spin_unlock(&tp->lock); - } - - status = read_interrupt_status_reg(); - -/************************ start note *********************************/ - } while (!(status & error) || more_work_to_be_done(status)); -/************************ end note note *********************************/ - return IRQ_HANDLED; -} - ---------------------------------------------------------------------- - - -We note several things from above: - -I) Any interrupt source which is caused by arriving packets is now -turned off when it occurs. Depending on the hardware, there could be -several reasons that arriving packets would cause interrupts; these are the -interrupt sources we wish to avoid. The two common ones are a) a packet -arriving (rxint) b) a packet arriving and finding no DMA buffers available -(rxnobuff) . -This means also acknowledge_ints_ASAP() will not clear the status -register for those two items above; clearing is done in the place where -proper work is done within NAPI; at the poll() and refill_rx_ring() -discussed further below. -netif_rx_schedule_prep() returns 1 if device is in running state and -gets successfully added to the core poll list. If we get a zero value -we can _almost_ assume are already added to the list (instead of not running. -Logic based on the fact that you shouldn't get interrupt if not running) -We rectify this by disabling rx and rxnobuf interrupts. - -II) that receive_packets(dev) and make_rx_buffs_avail() may have disappeared. -These functionalities are still around actually...... - -infact, receive_packets(dev) is very close to my_poll() and -make_rx_buffs_avail() is invoked from my_poll() - -4) converting receive_packets() to dev->poll() -=============================================== - -We need to convert the classical D Becker receive_packets(dev) to my_poll() - -First the typical receive_packets() below: -------------------------------------------------------------------- - -/* this is called by interrupt handler */ -static void receive_packets (struct net_device *dev) -{ - - struct my_private *tp = (struct my_private *)dev->priv; - rx_ring = tp->rx_ring; - cur_rx = tp->cur_rx; - int entry = cur_rx % RX_RING_SIZE; - int received = 0; - int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx; - - while (rx_ring_not_empty) { - u32 rx_status; - unsigned int rx_size; - unsigned int pkt_size; - struct sk_buff *skb; - /* read size+status of next frame from DMA ring buffer */ - /* the number 16 and 4 are just examples */ - rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset)); - rx_size = rx_status >> 16; - pkt_size = rx_size - 4; - - /* process errors */ - if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) || - (!(rx_status & RxStatusOK))) { - netdrv_rx_err (rx_status, dev, tp, ioaddr); - return; - } - - if (--rx_work_limit < 0) - break; - - /* grab a skb */ - skb = dev_alloc_skb (pkt_size + 2); - if (skb) { - . - . - netif_rx (skb); - . - . - } else { /* OOM */ - /*seems very driver specific ... some just pass - whatever is on the ring already. */ - } - - /* move to the next skb on the ring */ - entry = (++tp->cur_rx) % RX_RING_SIZE; - received++ ; - - } - - /* store current ring pointer state */ - tp->cur_rx = cur_rx; - - /* Refill the Rx ring buffers if they are needed */ - refill_rx_ring(); - . - . - -} -------------------------------------------------------------------- -We change it to a new one below; note the additional parameter in -the call. - -------------------------------------------------------------------- - -/* this is called by the network core */ |