From 291bc047e125ff02c9affe06a7df28bed57b054d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 26 Mar 2007 11:21:08 +0200 Subject: [PATCH] clockevents: remove bad designed sysfs support for now The current sysfs support of clockevents does not obey the "only one value per file" rule. The real fix is not 2.6.21 material. Therefor remove the sysfs support for now. Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Acked-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- kernel/time/clockevents.c | 69 ----------------------------------------------- 1 file changed, 69 deletions(-) (limited to 'kernel') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 67932ea78c1..76212b2a99d 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -274,72 +274,3 @@ void clockevents_notify(unsigned long reason, void *arg) } EXPORT_SYMBOL_GPL(clockevents_notify); -#ifdef CONFIG_SYSFS - -/** - * clockevents_show_registered - sysfs interface for listing clockevents - * @dev: unused - * @buf: char buffer to be filled with clock events list - * - * Provides sysfs interface for listing registered clock event devices - */ -static ssize_t clockevents_show_registered(struct sys_device *dev, char *buf) -{ - struct list_head *tmp; - char *p = buf; - int cpu; - - spin_lock(&clockevents_lock); - - list_for_each(tmp, &clockevent_devices) { - struct clock_event_device *ce; - - ce = list_entry(tmp, struct clock_event_device, list); - p += sprintf(p, "%-20s F:%04x M:%d", ce->name, - ce->features, ce->mode); - p += sprintf(p, " C:"); - if (!cpus_equal(ce->cpumask, cpu_possible_map)) { - for_each_cpu_mask(cpu, ce->cpumask) - p += sprintf(p, " %d", cpu); - } else { - /* - * FIXME: Add the cpu which is handling this sucker - */ - } - p += sprintf(p, "\n"); - } - - spin_unlock(&clockevents_lock); - - return p - buf; -} - -/* - * Sysfs setup bits: - */ -static SYSDEV_ATTR(registered, 0600, - clockevents_show_registered, NULL); - -static struct sysdev_class clockevents_sysclass = { - set_kset_name("clockevents"), -}; - -static struct sys_device clockevents_sys_device = { - .id = 0, - .cls = &clockevents_sysclass, -}; - -static int __init clockevents_sysfs_init(void) -{ - int error = sysdev_class_register(&clockevents_sysclass); - - if (!error) - error = sysdev_register(&clockevents_sys_device); - if (!error) - error = sysdev_create_file( - &clockevents_sys_device, - &attr_registered); - return error; -} -device_initcall(clockevents_sysfs_init); -#endif -- cgit v1.2.3-70-g09d2 From d62ac21aa075c8ddf3d02a98d28afce635e77e8e Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 26 Mar 2007 21:32:26 -0800 Subject: [PATCH] ntp: avoid time_offset overflows I've been seeing some odd NTP behavior recently on a few boxes and finally narrowed it down to time_offset overflowing when converted to SHIFT_UPDATE units (which was a side effect from my HZfreeNTP patch). This patch converts time_offset from a long to a s64 which resolves the issue. [tglx@linutronix.de: signedness fixes] Signed-off-by: John Stultz Cc: Roman Zippel Cc: john stultz Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/ntp.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index eb12509e00b..cb25649c6f5 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -32,7 +32,7 @@ static u64 tick_length, tick_length_base; /* TIME_ERROR prevents overwriting the CMOS clock */ static int time_state = TIME_OK; /* clock synchronization status */ int time_status = STA_UNSYNC; /* clock status bits */ -static long time_offset; /* time adjustment (ns) */ +static s64 time_offset; /* time adjustment (ns) */ static long time_constant = 2; /* pll time constant */ long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ @@ -196,7 +196,7 @@ void __attribute__ ((weak)) notify_arch_cmos_timer(void) */ int do_adjtimex(struct timex *txc) { - long ltemp, mtemp, save_adjust; + long mtemp, save_adjust, rem; s64 freq_adj, temp64; int result; @@ -277,14 +277,14 @@ int do_adjtimex(struct timex *txc) time_adjust = txc->offset; } else if (time_status & STA_PLL) { - ltemp = txc->offset * NSEC_PER_USEC; + time_offset = txc->offset * NSEC_PER_USEC; /* * Scale the phase adjustment and * clamp to the operating range. */ - time_offset = min(ltemp, MAXPHASE * NSEC_PER_USEC); - time_offset = max(time_offset, -MAXPHASE * NSEC_PER_USEC); + time_offset = min(time_offset, (s64)MAXPHASE * NSEC_PER_USEC); + time_offset = max(time_offset, (s64)-MAXPHASE * NSEC_PER_USEC); /* * Select whether the frequency is to be controlled @@ -297,11 +297,11 @@ int do_adjtimex(struct timex *txc) mtemp = xtime.tv_sec - time_reftime; time_reftime = xtime.tv_sec; - freq_adj = (s64)time_offset * mtemp; + freq_adj = time_offset * mtemp; freq_adj = shift_right(freq_adj, time_constant * 2 + (SHIFT_PLL + 2) * 2 - SHIFT_NSEC); if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { - temp64 = (s64)time_offset << (SHIFT_NSEC - SHIFT_FLL); + temp64 = time_offset << (SHIFT_NSEC - SHIFT_FLL); if (time_offset < 0) { temp64 = -temp64; do_div(temp64, mtemp); @@ -314,8 +314,10 @@ int do_adjtimex(struct timex *txc) freq_adj += time_freq; freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); - time_offset = (time_offset / NTP_INTERVAL_FREQ) - << SHIFT_UPDATE; + time_offset = div_long_long_rem_signed(time_offset, + NTP_INTERVAL_FREQ, + &rem); + time_offset <<= SHIFT_UPDATE; } /* STA_PLL */ } /* txc->modes & ADJ_OFFSET */ if (txc->modes & ADJ_TICK) @@ -328,12 +330,12 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) result = TIME_ERROR; if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) - txc->offset = save_adjust; + txc->offset = save_adjust; else - txc->offset = shift_right(time_offset, SHIFT_UPDATE) - * NTP_INTERVAL_FREQ / 1000; - txc->freq = (time_freq / NSEC_PER_USEC) - << (SHIFT_USEC - SHIFT_NSEC); + txc->offset = ((long)shift_right(time_offset, SHIFT_UPDATE)) * + NTP_INTERVAL_FREQ / 1000; + txc->freq = (time_freq / NSEC_PER_USEC) << + (SHIFT_USEC - SHIFT_NSEC); txc->maxerror = time_maxerror; txc->esterror = time_esterror; txc->status = time_status; -- cgit v1.2.3-70-g09d2 From 436ce71638eceb0f9dd7608157807c37b29c3db7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 27 Mar 2007 12:09:13 +0200 Subject: [PATCH] Revert "swsusp: disable nonboot CPUs before entering platform suspend" This reverts commit 94985134b7b46848267ed6b734320db01c974e72 and insteads removes the WARN_ON() that caused that commit in the first place. The problem is that we call disable_nonboot_cpus() in swsusp before powering down the system in order to avoid triggering the WARN_ON() in arch/x86_64/kernel/acpi/sleep.c:init_low_mapping() and this doesn't work well on Thomas' system. So instead, remove the WARN_ON() in arch/x86_64/kernel/acpi/sleep.c: init_low_mapping(), which triggers every time during the suspend to disk in the platform mode, as the potential problem it is related to doesn't seem to occur in practice. [ I think we might want to disallow the case of multiple users of that mm, or something. Normally, playing with the current process page tables on the current CPU should be fine as long as we don't have other threads using those tables at the same time.. Anyway, not pretty, but better than the warning or the lockup - Linus ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/acpi/sleep.c | 4 +++- kernel/power/disk.c | 1 - kernel/power/user.c | 3 +-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c index 23178ce6c78..e1548fbe95a 100644 --- a/arch/x86_64/kernel/acpi/sleep.c +++ b/arch/x86_64/kernel/acpi/sleep.c @@ -66,8 +66,10 @@ static void init_low_mapping(void) { pgd_t *slot0 = pgd_offset(current->mm, 0UL); low_ptr = *slot0; + /* FIXME: We're playing with the current task's page tables here, which + * is potentially dangerous on SMP systems. + */ set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET)); - WARN_ON(num_online_cpus() != 1); local_flush_tlb(); } diff --git a/kernel/power/disk.c b/kernel/power/disk.c index dee0ff40bef..aec19b063e3 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -58,7 +58,6 @@ static inline int platform_prepare(void) static void power_down(suspend_disk_method_t mode) { - disable_nonboot_cpus(); switch(mode) { case PM_DISK_PLATFORM: if (pm_ops && pm_ops->enter) { diff --git a/kernel/power/user.c b/kernel/power/user.c index bf211fee122..7cf6713b232 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -401,10 +401,9 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, case PMOPS_ENTER: if (data->platform_suspend) { - disable_nonboot_cpus(); kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); error = pm_ops->enter(PM_SUSPEND_DISK); - enable_nonboot_cpus(); + error = 0; } break; -- cgit v1.2.3-70-g09d2 From 935c631db827cc3a96df4dcc6fec374b994fdbd1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Mar 2007 13:17:18 +0200 Subject: [PATCH] hrtimers: fix reprogramming SMP race hrtimer_start() incorrectly set the 'reprogram' flag to enqueue_hrtimer(), which should only be 1 if the hrtimer is queued to the current CPU. Doing otherwise could result in a reprogramming of the current CPU's clockevents device, with a timer that is not queued to it - resulting in a bogus next expiry value. Signed-off-by: Ingo Molnar Cc: Michal Piotrowski Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hrtimer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 6a7938a0d51..067ba2c0532 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -814,7 +814,12 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) timer_stats_hrtimer_set_start_info(timer); - enqueue_hrtimer(timer, new_base, base == new_base); + /* + * Only allow reprogramming if the new base is on this CPU. + * (it might still be on another CPU if the timer was pending) + */ + enqueue_hrtimer(timer, new_base, + new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); unlock_hrtimer_base(timer, &flags); -- cgit v1.2.3-70-g09d2 From 14e9d5730adfca26452b3a2838a80af6950556f5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 28 Mar 2007 23:38:16 -0600 Subject: [PATCH] pid: Properly detect orphaned process groups in exit_notify In commit 0475ac0845f9295bc5f69af45f58dff2c104c8d1 when converting the orphaned process group handling to use struct pid I made a small mistake. I accidentally replaced an == with a !=. Besides just being a dumb thing to do apparently this has a bad side effect. The improper orphaned process group detection causes kwin to die after a suspend/resume cycle. I'm amazed this patch has been around as long as it has without anyone else noticing something funny going on. And the following people deserve credit for spotting and helping to reproduce this. Thanks to: Sid Boyce Thanks to: "Michael Wu" Signed-off-by: "Eric W. Biederman" Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index f132349c032..b55ed4cc910 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -790,7 +790,7 @@ static void exit_notify(struct task_struct *tsk) pgrp = task_pgrp(tsk); if ((task_pgrp(t) != pgrp) && - (task_session(t) != task_session(tsk)) && + (task_session(t) == task_session(tsk)) && will_become_orphaned_pgrp(pgrp, tsk) && has_stopped_jobs(pgrp)) { __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); -- cgit v1.2.3-70-g09d2 From 0c84ce268b69855919b6ac7edc8f11caf21e9c88 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 1 Apr 2007 23:49:48 -0700 Subject: [PATCH] driver core: fix built-in drivers sysfs links built-in drivers had broken sysfs links that caused bootup hangs for certain driver unregistry sequences. Signed-off-by: Ingo Molnar Acked-by: Kay Sievers Signed-off-by: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/device.h | 1 + kernel/module.c | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/include/linux/device.h b/include/linux/device.h index caad9bba965..5cf30e95c8b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -128,6 +128,7 @@ struct device_driver { struct module * owner; const char * mod_name; /* used for built-in modules */ + struct module_kobject * mkobj; int (*probe) (struct device * dev); int (*remove) (struct device * dev); diff --git a/kernel/module.c b/kernel/module.c index fbc51de6444..dcdb32b8b13 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2384,8 +2384,13 @@ void module_add_driver(struct module *mod, struct device_driver *drv) /* Lookup built-in module entry in /sys/modules */ mkobj = kset_find_obj(&module_subsys.kset, drv->mod_name); - if (mkobj) + if (mkobj) { mk = container_of(mkobj, struct module_kobject, kobj); + /* remember our module structure */ + drv->mkobj = mk; + /* kset_find_obj took a reference */ + kobject_put(mkobj); + } } if (!mk) @@ -2405,17 +2410,22 @@ EXPORT_SYMBOL(module_add_driver); void module_remove_driver(struct device_driver *drv) { + struct module_kobject *mk = NULL; char *driver_name; if (!drv) return; sysfs_remove_link(&drv->kobj, "module"); - if (drv->owner && drv->owner->mkobj.drivers_dir) { + + if (drv->owner) + mk = &drv->owner->mkobj; + else if (drv->mkobj) + mk = drv->mkobj; + if (mk && mk->drivers_dir) { driver_name = make_driver_name(drv); if (driver_name) { - sysfs_remove_link(drv->owner->mkobj.drivers_dir, - driver_name); + sysfs_remove_link(mk->drivers_dir, driver_name); kfree(driver_name); } } -- cgit v1.2.3-70-g09d2 From 1d64b9cb1dc2a7cd521444e3d908adeccd026356 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 1 Apr 2007 23:49:49 -0700 Subject: [PATCH] Fix microcode-related suspend problem Fix the regression resulting from the recent change of suspend code ordering that causes systems based on Intel x86 CPUs using the microcode driver to hang during the resume. The problem occurs since the microcode driver uses request_firmware() in its CPU hotplug notifier, which is called after tasks has been frozen and hangs. It can be fixed by telling the microcode driver to use the microcode stored in memory during the resume instead of trying to load it from disk. Signed-off-by: Rafael J. Wysocki Adrian Bunk Cc: Tigran Aivazian Cc: Pavel Machek Cc: Maxim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/microcode.c | 71 +++++++++++++++++++++++++++++++++++++++++--- include/linux/cpu.h | 4 +++ kernel/cpu.c | 32 ++++++++++---------- 3 files changed, 87 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index b8f16633a6e..cbe7ec8dbb9 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -567,6 +567,53 @@ static int cpu_request_microcode(int cpu) return error; } +static int apply_microcode_on_cpu(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + cpumask_t old; + unsigned int val[2]; + int err = 0; + + if (!uci->mc) + return -EINVAL; + + old = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + + /* Check if the microcode we have in memory matches the CPU */ + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) + err = -EINVAL; + + if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { + /* get processor flags from MSR 0x17 */ + rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + if (uci->pf != (1 << ((val[1] >> 18) & 7))) + err = -EINVAL; + } + + if (!err) { + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + /* see notes above for revision 1.07. Apparent chip bug */ + sync_core(); + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + if (uci->rev != val[1]) + err = -EINVAL; + } + + if (!err) + apply_microcode(cpu); + else + printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" + " sig=0x%x, pf=0x%x, rev=0x%x\n", + cpu, uci->sig, uci->pf, uci->rev); + + set_cpus_allowed(current, old); + return err; +} + static void microcode_init_cpu(int cpu) { cpumask_t old; @@ -577,7 +624,8 @@ static void microcode_init_cpu(int cpu) set_cpus_allowed(current, cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING) + if (uci->valid && system_state == SYSTEM_RUNNING && + !suspend_cpu_hotplug) cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); set_cpus_allowed(current, old); @@ -663,13 +711,24 @@ static int mc_sysdev_add(struct sys_device *sys_dev) return 0; pr_debug("Microcode:CPU %d added\n", cpu); - memset(uci, 0, sizeof(*uci)); + /* If suspend_cpu_hotplug is set, the system is resuming and we should + * use the data from before the suspend. + */ + if (suspend_cpu_hotplug) { + err = apply_microcode_on_cpu(cpu); + if (err) + microcode_fini_cpu(cpu); + } + if (!uci->valid) + memset(uci, 0, sizeof(*uci)); err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); if (err) return err; - microcode_init_cpu(cpu); + if (!uci->valid) + microcode_init_cpu(cpu); + return 0; } @@ -680,7 +739,11 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) if (!cpu_online(cpu)) return 0; pr_debug("Microcode:CPU %d removed\n", cpu); - microcode_fini_cpu(cpu); + /* If suspend_cpu_hotplug is set, the system is suspending and we should + * keep the microcode in memory for the resume. + */ + if (!suspend_cpu_hotplug) + microcode_fini_cpu(cpu); sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return 0; } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 769ddc6df49..c22b0dfcbcd 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -127,9 +127,13 @@ static inline int cpu_is_offline(int cpu) { return 0; } #endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_SUSPEND_SMP +extern int suspend_cpu_hotplug; + extern int disable_nonboot_cpus(void); extern void enable_nonboot_cpus(void); #else +#define suspend_cpu_hotplug 0 + static inline int disable_nonboot_cpus(void) { return 0; } static inline void enable_nonboot_cpus(void) {} #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 3d4206ada5c..36e70845cfc 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -254,6 +254,12 @@ int __cpuinit cpu_up(unsigned int cpu) } #ifdef CONFIG_SUSPEND_SMP +/* Needed to prevent the microcode driver from requesting firmware in its CPU + * hotplug notifier during the suspend/resume. + */ +int suspend_cpu_hotplug; +EXPORT_SYMBOL(suspend_cpu_hotplug); + static cpumask_t frozen_cpus; int disable_nonboot_cpus(void) @@ -261,16 +267,8 @@ int disable_nonboot_cpus(void) int cpu, first_cpu, error = 0; mutex_lock(&cpu_add_remove_lock); - first_cpu = first_cpu(cpu_present_map); - if (!cpu_online(first_cpu)) { - error = _cpu_up(first_cpu); - if (error) { - printk(KERN_ERR "Could not bring CPU%d up.\n", - first_cpu); - goto out; - } - } - + suspend_cpu_hotplug = 1; + first_cpu = first_cpu(cpu_online_map); /* We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time */ @@ -296,7 +294,7 @@ int disable_nonboot_cpus(void) } else { printk(KERN_ERR "Non-boot CPUs are not disabled\n"); } -out: + suspend_cpu_hotplug = 0; mutex_unlock(&cpu_add_remove_lock); return error; } @@ -308,20 +306,22 @@ void enable_nonboot_cpus(void) /* Allow everyone to use the CPU hotplug again */ mutex_lock(&cpu_add_remove_lock); cpu_hotplug_disabled = 0; - mutex_unlock(&cpu_add_remove_lock); if (cpus_empty(frozen_cpus)) - return; + goto out; + suspend_cpu_hotplug = 1; printk("Enabling non-boot CPUs ...\n"); for_each_cpu_mask(cpu, frozen_cpus) { - error = cpu_up(cpu); + error = _cpu_up(cpu); if (!error) { printk("CPU%d is up\n", cpu); continue; } - printk(KERN_WARNING "Error taking CPU%d up: %d\n", - cpu, error); + printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); } cpus_clear(frozen_cpus); + suspend_cpu_hotplug = 0; +out: + mutex_unlock(&cpu_add_remove_lock); } #endif -- cgit v1.2.3-70-g09d2 From 456a09dce9ca9b0013cabcda918aee851a04471d Mon Sep 17 00:00:00 2001 From: Thomas Bittermann Date: Wed, 4 Apr 2007 22:20:54 +0200 Subject: [PATCH] kernel/time.c: add missing symbol exports This patch adds 2 missing symbol exports: jiffies_to_timeval() and timeval_to_jiffies(). The (not yet merged) dm-raid4-5 module will need them, and they used to be indirectly exported by virtue of being inline functions. Commit 8b9365d753d9870bb6451504c13570b81923228f ("[PATCH] Uninline jiffies.h functions") uninlined them, and thus modules now need them explicitly exported to use them. Signed-off-by: Thomas Bittermann Acked-by: Andrew Morton Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Acked-by: john stultz Signed-off-by: Linus Torvalds --- kernel/time.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/time.c b/kernel/time.c index c6c80ea5d0e..2f47888e46c 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -635,6 +635,7 @@ timeval_to_jiffies(const struct timeval *value) (((u64)usec * USEC_CONVERSION + USEC_ROUND) >> (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; } +EXPORT_SYMBOL(timeval_to_jiffies); void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) { @@ -649,6 +650,7 @@ void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) tv_usec /= NSEC_PER_USEC; value->tv_usec = tv_usec; } +EXPORT_SYMBOL(jiffies_to_timeval); /* * Convert jiffies/jiffies_64 to clock_t and back. -- cgit v1.2.3-70-g09d2 From c75fd0ee6e1750e6e527ed1d4aeee66739d9ad79 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 4 Apr 2007 19:08:21 -0700 Subject: [PATCH] swsusp: fix memory shrinker Fix a bug in the swsusp's memory shrinker that causes some systems using highmem to refuse to suspend to disk if image_size is set above 1/2 of available RAM. Special thanks to Jiri Slaby for reporting the problem and assistance in debugging it. Signed-off-by: Rafael J. Wysocki Cc: Jiri Slaby Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/swsusp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 7fb834397a0..175370824f3 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -229,13 +229,13 @@ int swsusp_shrink_memory(void) size += highmem_size; for_each_zone (zone) if (populated_zone(zone)) { + tmp += snapshot_additional_pages(zone); if (is_highmem(zone)) { highmem_size -= zone_page_state(zone, NR_FREE_PAGES); } else { tmp -= zone_page_state(zone, NR_FREE_PAGES); tmp += zone->lowmem_reserve[ZONE_NORMAL]; - tmp += snapshot_additional_pages(zone); } } -- cgit v1.2.3-70-g09d2 From 98de9e3ba23422b5c45b91c93aec1cb1e17514dc Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 4 Apr 2007 19:08:24 -0700 Subject: [PATCH] fix jiffies clocksource inittime In debugging a problem w/ the -rt tree, I noticed that on systems that mark the tsc as unstable before it is registered, the TSC would still be selected and used for a short period of time. Digging in it looks to be a result of the mix of the clocksource list changes and my clocksource initialization changes. With the -rt tree, using a bad TSC, even for a short period of time can results in a hang at boot. I was not able to reproduce this hang w/ mainline, but I'm not completely certain that someone won't trip on it. This patch resolves the issue by initializing the jiffies clocksource earlier so a bad TSC won't get selected just because nothing else is yet registered. Signed-off-by: John Stultz Acked-by: Thomas Gleixner Cc: Ingo Molnar Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/jiffies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 3be8da8fed7..4c256fdb887 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -69,4 +69,4 @@ static int __init init_jiffies_clocksource(void) return clocksource_register(&clocksource_jiffies); } -module_init(init_jiffies_clocksource); +core_initcall(init_jiffies_clocksource); -- cgit v1.2.3-70-g09d2 From 995f054f2a342f8505fed4f8395d12c0f5966414 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 7 Apr 2007 12:05:00 +0200 Subject: [PATCH] high-res timers: resume fix Soeren Sonnenburg reported that upon resume he is getting this backtrace: [] smp_apic_timer_interrupt+0x57/0x90 [] retrigger_next_event+0x0/0xb0 [] apic_timer_interrupt+0x28/0x30 [] retrigger_next_event+0x0/0xb0 [] __kfifo_put+0x8/0x90 [] on_each_cpu+0x35/0x60 [] clock_was_set+0x18/0x20 [] timekeeping_resume+0x7c/0xa0 [] __sysdev_resume+0x11/0x80 [] sysdev_resume+0x47/0x80 [] device_power_up+0x5/0x10 it turns out that on resume we mistakenly re-enable interrupts too early. Do the timer retrigger only on the current CPU. Signed-off-by: Ingo Molnar Acked-by: Thomas Gleixner Acked-by: Soeren Sonnenburg Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 3 +++ kernel/hrtimer.c | 12 ++++++++++++ kernel/timer.c | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5bdbc744e77..17c29dca835 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -206,6 +206,7 @@ struct hrtimer_cpu_base { struct clock_event_device; extern void clock_was_set(void); +extern void hres_timers_resume(void); extern void hrtimer_interrupt(struct clock_event_device *dev); /* @@ -236,6 +237,8 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) */ static inline void clock_was_set(void) { } +static inline void hres_timers_resume(void) { } + /* * In non high resolution mode the time reference is taken from * the base softirq time variable. diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 067ba2c0532..b74860aaf5f 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -458,6 +458,18 @@ void clock_was_set(void) on_each_cpu(retrigger_next_event, NULL, 0, 1); } +/* + * During resume we might have to reprogram the high resolution timer + * interrupt (on the local CPU): + */ +void hres_timers_resume(void) +{ + WARN_ON_ONCE(num_online_cpus() > 1); + + /* Retrigger the CPU local events: */ + retrigger_next_event(NULL); +} + /* * Check, whether the timer is on the callback pending list */ diff --git a/kernel/timer.c b/kernel/timer.c index 440048acaea..dd6c2c1c561 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1016,7 +1016,7 @@ static int timekeeping_resume(struct sys_device *dev) clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); /* Resume hrtimers */ - clock_was_set(); + hres_timers_resume(); return 0; } -- cgit v1.2.3-70-g09d2 From 7f30e49ee1c2c1e95ce9d2ccce5221ddb793dd60 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 7 Apr 2007 14:59:41 +0900 Subject: [PATCH] irq-devres: fix failure path of devm_request_irq() devres should be deallocated with devres_free() not kfree(). This bug corrupts slab on IRQ request failure. Fix it. Signed-off-by: Tejun Heo Cc: Andrew Morton Cc: Greg KH Signed-off-by: Linus Torvalds --- kernel/irq/devres.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 85a430da0fb..d8ee241115f 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -54,7 +54,7 @@ int devm_request_irq(struct device *dev, unsigned int irq, rc = request_irq(irq, handler, irqflags, devname, dev_id); if (rc) { - kfree(dr); + devres_free(dr); return rc; } -- cgit v1.2.3-70-g09d2 From 35f6f753b79705bc4b62da5dcc218d75ffa88370 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 6 Apr 2007 21:18:06 +0200 Subject: [PATCH] sched: get rid of p->children use in show_task() the p->parent PID printout gives us all the information about the task tree that we need - the eldest_child()/older_sibling()/ younger_sibling() printouts are mostly historic and i do not remember ever having used those fields. (IMO in fact they confuse the SysRq-T output.) So remove them. This code has sentimental value though, those fields and printouts are one of the oldest ones still surviving from Linux v0.95's kernel/sched.c: if (p->p_ysptr || p->p_osptr) printk(" Younger sib=%d, older sib=%d\n\r", p->p_ysptr ? p->p_ysptr->pid : -1, p->p_osptr ? p->p_osptr->pid : -1); else printk("\n\r"); written 15 years ago, in early 1992. Signed-off-by: Ingo Molnar Signed-off-by: Linus 'snif' Torvalds --- kernel/sched.c | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index a4ca632c477..cdad3b04242 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4687,27 +4687,6 @@ out_unlock: return retval; } -static inline struct task_struct *eldest_child(struct task_struct *p) -{ - if (list_empty(&p->children)) - return NULL; - return list_entry(p->children.next,struct task_struct,sibling); -} - -static inline struct task_struct *older_sibling(struct task_struct *p) -{ - if (p->sibling.prev==&p->parent->children) - return NULL; - return list_entry(p->sibling.prev,struct task_struct,sibling); -} - -static inline struct task_struct *younger_sibling(struct task_struct *p) -{ - if (p->sibling.next==&p->parent->children) - return NULL; - return list_entry(p->sibling.next,struct task_struct,sibling); -} - static const char stat_nam[] = "RSDTtZX"; static void show_task(struct task_struct *p) @@ -4738,19 +4717,7 @@ static void show_task(struct task_struct *p) free = (unsigned long)n - (unsigned long)end_of_stack(p); } #endif - printk("%5lu %5d %6d ", free, p->pid, p->parent->pid); - if ((relative = eldest_child(p))) - printk("%5d ", relative->pid); - else - printk(" "); - if ((relative = younger_sibling(p))) - printk("%7d", relative->pid); - else - printk(" "); - if ((relative = older_sibling(p))) - printk(" %5d", relative->pid); - else - printk(" "); + printk("%5lu %5d %6d", free, p->pid, p->parent->pid); if (!p->mm) printk(" (L-TLB)\n"); else -- cgit v1.2.3-70-g09d2 From d354d2f4a6fc1b722c2e464a8b3cfd2f6afb304b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 7 Apr 2007 10:18:33 -0700 Subject: sched.c: Remove unused variable 'relative' Getting rid of the p->children printout in show_task() left behind an unused variable. Signed-off-by: Linus Torvalds --- kernel/sched.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index cdad3b04242..b9a68373014 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4691,7 +4691,6 @@ static const char stat_nam[] = "RSDTtZX"; static void show_task(struct task_struct *p) { - struct task_struct *relative; unsigned long free = 0; unsigned state; -- cgit v1.2.3-70-g09d2 From fe20e581a72979917e35d5146458ceba79be391f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 11 Apr 2007 23:28:41 -0700 Subject: [PATCH] fix kernel oops with badly formatted module option Catch malformed kernel parameter usage of "param = value". Spaces are not supported, but don't cause a kernel fault on such usage, just report an error. Signed-off-by: Randy Dunlap Acked-by: Larry Finger Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/params.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index e265b13195b..1fc4ac746cd 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -356,6 +356,10 @@ int param_set_copystring(const char *val, struct kernel_param *kp) { struct kparam_string *kps = kp->arg; + if (!val) { + printk(KERN_ERR "%s: missing param set value\n", kp->name); + return -EINVAL; + } if (strlen(val)+1 > kps->maxlen) { printk(KERN_ERR "%s: string doesn't fit in %u chars.\n", kp->name, kps->maxlen-1); -- cgit v1.2.3-70-g09d2 From 91fcd412e957f433e9f1abeb0b1926dbeb66ca80 Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Mon, 23 Apr 2007 14:41:14 -0700 Subject: Allow reading tainted flag as user The commit 34f5a39899f3f3e815da64f48ddb72942d86c366 restricted reading of the tainted value. The attached patch changes this back to a write-only check and restores the read behaviour of older versions. Signed-off-by: Bastian Blank Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1b255df4fcd..c904748f229 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1676,7 +1676,7 @@ static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp, { int op; - if (!capable(CAP_SYS_ADMIN)) + if (write && !capable(CAP_SYS_ADMIN)) return -EPERM; op = OP_OR; -- cgit v1.2.3-70-g09d2 From b7aa0bf70c4afb9e38be25f5c0922498d0f8684c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Apr 2007 16:16:32 -0700 Subject: [NET]: convert network timestamps to ktime_t We currently use a special structure (struct skb_timeval) and plain 'struct timeval' to store packet timestamps in sk_buffs and struct sock. This has some drawbacks : - Fixed resolution of micro second. - Waste of space on 64bit platforms where sizeof(struct timeval)=16 I suggest using ktime_t that is a nice abstraction of high resolution time services, currently capable of nanosecond resolution. As sizeof(ktime_t) is 8 bytes, using ktime_t in 'struct sock' permits a 8 byte shrink of this structure on 64bit architectures. Some other structures also benefit from this size reduction (struct ipq in ipv4/ip_fragment.c, struct frag_queue in ipv6/reassembly.c, ...) Once this ktime infrastructure adopted, we can more easily provide nanosecond resolution on top of it. (ioctl SIOCGSTAMPNS and/or SO_TIMESTAMPNS/SCM_TIMESTAMPNS) Note : this patch includes a bug correction in compat_sock_get_timestamp() where a "err = 0;" was missing (so this syscall returned -ENOENT instead of 0) Signed-off-by: Eric Dumazet CC: Stephen Hemminger CC: John find Signed-off-by: David S. Miller --- include/linux/skbuff.h | 26 +++++--------------------- include/net/sock.h | 18 +++++++++--------- kernel/time.c | 1 + net/bridge/netfilter/ebt_ulog.c | 6 ++++-- net/compat.c | 15 ++++++++++----- net/core/dev.c | 19 ++++--------------- net/core/sock.c | 16 +++++++++------- net/econet/af_econet.c | 2 +- net/ipv4/ip_fragment.c | 8 ++++---- net/ipv4/netfilter/ip_queue.c | 6 ++++-- net/ipv4/netfilter/ipt_ULOG.c | 8 +++++--- net/ipv6/exthdrs.c | 2 +- net/ipv6/netfilter/ip6_queue.c | 6 ++++-- net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +++--- net/ipv6/reassembly.c | 6 +++--- net/ipx/af_ipx.c | 4 ++-- net/netfilter/nfnetlink_log.c | 8 ++++---- net/netfilter/nfnetlink_queue.c | 8 ++++---- net/packet/af_packet.c | 8 +++++--- net/sunrpc/svcsock.c | 10 +++------- 20 files changed, 85 insertions(+), 98 deletions(-) (limited to 'kernel') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5992f65b418..f9441b5f8d1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -27,6 +27,7 @@ #include #include #include +#include #define HAVE_ALLOC_SKB /* For the drivers to know */ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ @@ -156,11 +157,6 @@ struct skb_shared_info { #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) -struct skb_timeval { - u32 off_sec; - u32 off_usec; -}; - enum { SKB_FCLONE_UNAVAILABLE, @@ -233,7 +229,7 @@ struct sk_buff { struct sk_buff *prev; struct sock *sk; - struct skb_timeval tstamp; + ktime_t tstamp; struct net_device *dev; int iif; /* 4 byte hole on 64 bit*/ @@ -1365,26 +1361,14 @@ extern void skb_add_mtu(int mtu); */ static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp) { - stamp->tv_sec = skb->tstamp.off_sec; - stamp->tv_usec = skb->tstamp.off_usec; + *stamp = ktime_to_timeval(skb->tstamp); } -/** - * skb_set_timestamp - set timestamp of a skb - * @skb: skb to set stamp of - * @stamp: pointer to struct timeval to get stamp from - * - * Timestamps are stored in the skb as offsets to a base timestamp. - * This function converts a struct timeval to an offset and stores - * it in the skb. - */ -static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp) +static inline void __net_timestamp(struct sk_buff *skb) { - skb->tstamp.off_sec = stamp->tv_sec; - skb->tstamp.off_usec = stamp->tv_usec; + skb->tstamp = ktime_get_real(); } -extern void __net_timestamp(struct sk_buff *skb); extern __sum16 __skb_checksum_complete(struct sk_buff *skb); diff --git a/include/net/sock.h b/include/net/sock.h index a3366c3c837..9583639090d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -244,7 +244,7 @@ struct sock { struct sk_filter *sk_filter; void *sk_protinfo; struct timer_list sk_timer; - struct timeval sk_stamp; + ktime_t sk_stamp; struct socket *sk_socket; void *sk_user_data; struct page *sk_sndmsg_page; @@ -1307,19 +1307,19 @@ static inline int sock_intr_errno(long timeo) static __inline__ void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - struct timeval stamp; + ktime_t kt = skb->tstamp; - skb_get_timestamp(skb, &stamp); if (sock_flag(sk, SOCK_RCVTSTAMP)) { + struct timeval tv; /* Race occurred between timestamp enabling and packet receiving. Fill in the current time for now. */ - if (stamp.tv_sec == 0) - do_gettimeofday(&stamp); - skb_set_timestamp(skb, &stamp); - put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), - &stamp); + if (kt.tv64 == 0) + kt = ktime_get_real(); + skb->tstamp = kt; + tv = ktime_to_timeval(kt); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(tv), &tv); } else - sk->sk_stamp = stamp; + sk->sk_stamp = kt; } /** diff --git a/kernel/time.c b/kernel/time.c index 2f47888e46c..a1439f421d0 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -469,6 +469,7 @@ struct timeval ns_to_timeval(const s64 nsec) return tv; } +EXPORT_SYMBOL(ns_to_timeval); /* * Convert jiffies to milliseconds and back. diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 8e15cc47f6c..259f5c370f3 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -130,6 +130,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, unsigned int group = uloginfo->nlgroup; ebt_ulog_buff_t *ub = &ulog_buffers[group]; spinlock_t *lock = &ub->lock; + ktime_t kt; if ((uloginfo->cprange == 0) || (uloginfo->cprange > skb->len + ETH_HLEN)) @@ -164,9 +165,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, /* Fill in the ulog data */ pm->version = EBT_ULOG_VERSION; - do_gettimeofday(&pm->stamp); + kt = ktime_get_real(); + pm->stamp = ktime_to_timeval(kt); if (ub->qlen == 1) - skb_set_timestamp(ub->skb, &pm->stamp); + ub->skb->tstamp = kt; pm->data_len = copy_len; pm->mark = skb->mark; pm->hook = hooknr; diff --git a/net/compat.c b/net/compat.c index 1f32866d09b..17c2710b2b9 100644 --- a/net/compat.c +++ b/net/compat.c @@ -545,15 +545,20 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) struct compat_timeval __user *ctv = (struct compat_timeval __user*) userstamp; int err = -ENOENT; + struct timeval tv; if (!sock_flag(sk, SOCK_TIMESTAMP)) sock_enable_timestamp(sk); - if (sk->sk_stamp.tv_sec == -1) + tv = ktime_to_timeval(sk->sk_stamp); + if (tv.tv_sec == -1) return err; - if (sk->sk_stamp.tv_sec == 0) - do_gettimeofday(&sk->sk_stamp); - if (put_user(sk->sk_stamp.tv_sec, &ctv->tv_sec) || - put_user(sk->sk_stamp.tv_usec, &ctv->tv_usec)) + if (tv.tv_sec == 0) { + sk->sk_stamp = ktime_get_real(); + tv = ktime_to_timeval(sk->sk_stamp); + } + err = 0; + if (put_user(tv.tv_sec, &ctv->tv_sec) || + put_user(tv.tv_usec, &ctv->tv_usec)) err = -EFAULT; return err; } diff --git a/net/core/dev.c b/net/core/dev.c index 4dc93cc4d5b..582db646cc5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1031,23 +1031,12 @@ void net_disable_timestamp(void) atomic_dec(&netstamp_needed); } -void __net_timestamp(struct sk_buff *skb) -{ - struct timeval tv; - - do_gettimeofday(&tv); - skb_set_timestamp(skb, &tv); -} -EXPORT_SYMBOL(__net_timestamp); - static inline void net_timestamp(struct sk_buff *skb) { if (atomic_read(&netstamp_needed)) __net_timestamp(skb); - else { - skb->tstamp.off_sec = 0; - skb->tstamp.off_usec = 0; - } + else + skb->tstamp.tv64 = 0; } /* @@ -1577,7 +1566,7 @@ int netif_rx(struct sk_buff *skb) if (netpoll_rx(skb)) return NET_RX_DROP; - if (!skb->tstamp.off_sec) + if (!skb->tstamp.tv64) net_timestamp(skb); /* @@ -1769,7 +1758,7 @@ int netif_receive_skb(struct sk_buff *skb) if (skb->dev->poll && netpoll_rx(skb)) return NET_RX_DROP; - if (!skb->tstamp.off_sec) + if (!skb->tstamp.tv64) net_timestamp(skb); if (!skb->iif) diff --git a/net/core/sock.c b/net/core/sock.c index 6d35d5775ba..6ddb3664b99 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1512,8 +1512,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; - sk->sk_stamp.tv_sec = -1L; - sk->sk_stamp.tv_usec = -1L; + sk->sk_stamp = ktime_set(-1L, -1L); atomic_set(&sk->sk_refcnt, 1); } @@ -1554,14 +1553,17 @@ EXPORT_SYMBOL(release_sock); int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) { + struct timeval tv; if (!sock_flag(sk, SOCK_TIMESTAMP)) sock_enable_timestamp(sk); - if (sk->sk_stamp.tv_sec == -1) + tv = ktime_to_timeval(sk->sk_stamp); + if (tv.tv_sec == -1) return -ENOENT; - if (sk->sk_stamp.tv_sec == 0) - do_gettimeofday(&sk->sk_stamp); - return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ? - -EFAULT : 0; + if (tv.tv_sec == 0) { + sk->sk_stamp = ktime_get_real(); + tv = ktime_to_timeval(sk->sk_stamp); + } + return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; } EXPORT_SYMBOL(sock_get_timestamp); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index bc12e36263f..f573eddc603 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -162,7 +162,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_toiovec(msg->msg_iov, skb->data, copied); if (err) goto out_free; - skb_get_timestamp(skb, &sk->sk_stamp); + sk->sk_stamp = skb->tstamp; if (msg->msg_name) memcpy(msg->msg_name, skb->cb, msg->msg_namelen); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b6f05538037..e10be7d7752 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -92,7 +92,7 @@ struct ipq { spinlock_t lock; atomic_t refcnt; struct timer_list timer; /* when will this queue expire? */ - struct timeval stamp; + ktime_t stamp; int iif; unsigned int rid; struct inet_peer *peer; @@ -592,7 +592,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (skb->dev) qp->iif = skb->dev->ifindex; skb->dev = NULL; - skb_get_timestamp(skb, &qp->stamp); + qp->stamp = skb->tstamp; qp->meat += skb->len; atomic_add(skb->truesize, &ip_frag_mem); if (offset == 0) @@ -674,7 +674,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->next = NULL; head->dev = dev; - skb_set_timestamp(head, &qp->stamp); + head->tstamp = qp->stamp; iph = head->nh.iph; iph->frag_off = 0; @@ -734,7 +734,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) return NULL; } -void ipfrag_init(void) +void __init ipfrag_init(void) { ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ (jiffies ^ (jiffies >> 6))); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index a14798a850d..5842f1aa973 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -197,6 +197,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) struct sk_buff *skb; struct ipq_packet_msg *pmsg; struct nlmsghdr *nlh; + struct timeval tv; read_lock_bh(&queue_lock); @@ -241,8 +242,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = entry->skb->tstamp.off_sec; - pmsg->timestamp_usec = entry->skb->tstamp.off_usec; + tv = ktime_to_timeval(entry->skb->tstamp); + pmsg->timestamp_sec = tv.tv_sec; + pmsg->timestamp_usec = tv.tv_usec; pmsg->mark = entry->skb->mark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 9acc018766f..9718b666a38 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -187,6 +187,7 @@ static void ipt_ulog_packet(unsigned int hooknum, ulog_packet_msg_t *pm; size_t size, copy_len; struct nlmsghdr *nlh; + struct timeval tv; /* ffs == find first bit set, necessary because userspace * is already shifting groupnumber, but we need unshifted. @@ -232,13 +233,14 @@ static void ipt_ulog_packet(unsigned int hooknum, pm = NLMSG_DATA(nlh); /* We might not have a timestamp, get one */ - if (skb->tstamp.off_sec == 0) + if (skb->tstamp.tv64 == 0) __net_timestamp((struct sk_buff *)skb); /* copy hook, prefix, timestamp, payload, etc. */ pm->data_len = copy_len; - put_unaligned(skb->tstamp.off_sec, &pm->timestamp_sec); - put_unaligned(skb->tstamp.off_usec, &pm->timestamp_usec); + tv = ktime_to_timeval(skb->tstamp); + put_unaligned(tv.tv_sec, &pm->timestamp_sec); + put_unaligned(tv.tv_usec, &pm->timestamp_usec); put_unaligned(skb->mark, &pm->mark); pm->hook = hooknum; if (prefix != NULL) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index fb39604c3d0..a963a31e5fb 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -255,7 +255,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) ipv6_addr_copy(&ipv6h->saddr, &hao->addr); ipv6_addr_copy(&hao->addr, &tmp_addr); - if (skb->tstamp.off_sec == 0) + if (skb->tstamp.tv64 == 0) __net_timestamp(skb); return 1; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index fdb30a5916e..66a2c413525 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -195,6 +195,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) struct sk_buff *skb; struct ipq_packet_msg *pmsg; struct nlmsghdr *nlh; + struct timeval tv; read_lock_bh(&queue_lock); @@ -239,8 +240,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = entry->skb->tstamp.off_sec; - pmsg->timestamp_usec = entry->skb->tstamp.off_usec; + tv = ktime_to_timeval(entry->skb->tstamp); + pmsg->timestamp_sec = tv.tv_sec; + pmsg->timestamp_usec = tv.tv_usec; pmsg->mark = entry->skb->mark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 15ab1e3e8b5..c311b9a12ca 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -82,7 +82,7 @@ struct nf_ct_frag6_queue struct sk_buff *fragments; int len; int meat; - struct timeval stamp; + ktime_t stamp; unsigned int csum; __u8 last_in; /* has first/last segment arrived? */ #define COMPLETE 4 @@ -542,7 +542,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, fq->fragments = skb; skb->dev = NULL; - skb_get_timestamp(skb, &fq->stamp); + fq->stamp = skb->tstamp; fq->meat += skb->len; atomic_add(skb->truesize, &nf_ct_frag6_mem); @@ -648,7 +648,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->next = NULL; head->dev = dev; - skb_set_timestamp(head, &fq->stamp); + head->tstamp = fq->stamp; head->nh.ipv6h->payload_len = htons(payload_len); /* Yes, and fold redundant checksum back. 8) */ diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 7034c54e501..1dde449379f 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -88,7 +88,7 @@ struct frag_queue int len; int meat; int iif; - struct timeval stamp; + ktime_t stamp; unsigned int csum; __u8 last_in; /* has first/last segment arrived? */ #define COMPLETE 4 @@ -562,7 +562,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (skb->dev) fq->iif = skb->dev->ifindex; skb->dev = NULL; - skb_get_timestamp(skb, &fq->stamp); + fq->stamp = skb->tstamp; fq->meat += skb->len; atomic_add(skb->truesize, &ip6_frag_mem); @@ -663,7 +663,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->next = NULL; head->dev = dev; - skb_set_timestamp(head, &fq->stamp); + head->tstamp = fq->stamp; head->nh.ipv6h->payload_len = htons(payload_len); IP6CB(head)->nhoff = nhoff; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index cac35a77f06..6c6c0a3a0ab 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1807,8 +1807,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, copied); if (rc) goto out_free; - if (skb->tstamp.off_sec) - skb_get_timestamp(skb, &sk->sk_stamp); + if (skb->tstamp.tv64) + sk->sk_stamp = skb->tstamp; msg->msg_namelen = sizeof(*sipx); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 5cb30ebba0f..5eeebd2efa7 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -509,11 +509,11 @@ __build_packet_message(struct nfulnl_instance *inst, NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); } - if (skb->tstamp.off_sec) { + if (skb->tstamp.tv64) { struct nfulnl_msg_packet_timestamp ts; - - ts.sec = cpu_to_be64(skb->tstamp.off_sec); - ts.usec = cpu_to_be64(skb->tstamp.off_usec); + struct timeval tv = ktime_to_timeval(skb->tstamp); + ts.sec = cpu_to_be64(tv.tv_sec); + ts.usec = cpu_to_be64(tv.tv_usec); NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index d9ce4a71d0f..cfbee39f61d 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -495,11 +495,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); } - if (entskb->tstamp.off_sec) { + if (entskb->tstamp.tv64) { struct nfqnl_msg_packet_timestamp ts; - - ts.sec = cpu_to_be64(entskb->tstamp.off_sec); - ts.usec = cpu_to_be64(entskb->tstamp.off_usec); + struct timeval tv = ktime_to_timeval(entskb->tstamp); + ts.sec = cpu_to_be64(tv.tv_sec); + ts.usec = cpu_to_be64(tv.tv_usec); NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 28d47e8f287..f9866a8456a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -582,6 +582,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; unsigned short macoff, netoff; struct sk_buff *copy_skb = NULL; + struct timeval tv; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -656,12 +657,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe h->tp_snaplen = snaplen; h->tp_mac = macoff; h->tp_net = netoff; - if (skb->tstamp.off_sec == 0) { + if (skb->tstamp.tv64 == 0) { __net_timestamp(skb); sock_enable_timestamp(sk); } - h->tp_sec = skb->tstamp.off_sec; - h->tp_usec = skb->tstamp.off_usec; + tv = ktime_to_timeval(skb->tstamp); + h->tp_sec = tv.tv_sec; + h->tp_usec = tv.tv_usec; sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); sll->sll_halen = 0; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 2772fee9388..22f61aee482 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -798,16 +798,12 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) dprintk("svc: recvfrom returned error %d\n", -err); } rqstp->rq_addrlen = sizeof(rqstp->rq_addr); - if (skb->tstamp.off_sec == 0) { - struct timeval tv; - - tv.tv_sec = xtime.tv_sec; - tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; - skb_set_timestamp(skb, &tv); + if (skb->tstamp.tv64 == 0) { + skb->tstamp = ktime_get_real(); /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ } - skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp); + svsk->sk_sk->sk_stamp = skb->tstamp; set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ /* -- cgit v1.2.3-70-g09d2 From 641b9e0e8b7f96425da6ce98f3361e3af0baee29 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 16 Mar 2007 01:18:42 -0700 Subject: [NET_SCHED]: Use ktime as clocksource Get rid of the manual clock source selection mess and use ktime. Also use a scalar representation, which allows to clean up pkt_sched.h a bit more and results in less ktime_to_ns() calls in most cases. The PSCHED_US2JIFFIE/PSCHED_JIFFIE2US macros are implemented quite inefficient by this patch, following patches will convert all qdiscs to hrtimers and get rid of them entirely. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 169 ++++-------------------------------------------- kernel/hrtimer.c | 1 + net/sched/Kconfig | 56 ---------------- net/sched/sch_api.c | 77 +--------------------- net/sched/sch_hfsc.c | 31 +-------- 5 files changed, 19 insertions(+), 315 deletions(-) (limited to 'kernel') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index f6afee73235..1c12afd113d 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -2,6 +2,7 @@ #define __NET_PKT_SCHED_H #include +#include #include struct qdisc_walker @@ -37,176 +38,32 @@ static inline void *qdisc_priv(struct Qdisc *q) The things are not so bad, because we may use artifical clock evaluated by integration of network data flow in the most critical places. - - Note: we do not use fastgettimeofday. - The reason is that, when it is not the same thing as - gettimeofday, it returns invalid timestamp, which is - not updated, when net_bh is active. */ -/* General note about internal clock. - - Any clock source returns time intervals, measured in units - close to 1usec. With source CONFIG_NET_SCH_CLK_GETTIMEOFDAY it is precisely - microseconds, otherwise something close but different chosen to minimize - arithmetic cost. Ratio usec/internal untis in form nominator/denominator - may be read from /proc/net/psched. - */ - - -#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY - -typedef struct timeval psched_time_t; -typedef long psched_tdiff_t; - -#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp)) -#define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(usecs) -#define PSCHED_JIFFIE2US(delay) jiffies_to_usecs(delay) - -#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ - typedef u64 psched_time_t; typedef long psched_tdiff_t; -#ifdef CONFIG_NET_SCH_CLK_JIFFIES - -#if HZ < 96 -#define PSCHED_JSCALE 14 -#elif HZ >= 96 && HZ < 192 -#define PSCHED_JSCALE 13 -#elif HZ >= 192 && HZ < 384 -#define PSCHED_JSCALE 12 -#elif HZ >= 384 && HZ < 768 -#define PSCHED_JSCALE 11 -#elif HZ >= 768 -#define PSCHED_JSCALE 10 -#endif - -#define PSCHED_GET_TIME(stamp) ((stamp) = (get_jiffies_64()<>PSCHED_JSCALE) -#define PSCHED_JIFFIE2US(delay) ((delay)< - -extern psched_tdiff_t psched_clock_per_hz; -extern int psched_clock_scale; -extern psched_time_t psched_time_base; -extern cycles_t psched_time_mark; - -#define PSCHED_GET_TIME(stamp) \ -do { \ - cycles_t cur = get_cycles(); \ - if (sizeof(cycles_t) == sizeof(u32)) { \ - if (cur <= psched_time_mark) \ - psched_time_base += 0x100000000ULL; \ - psched_time_mark = cur; \ - (stamp) = (psched_time_base + cur)>>psched_clock_scale; \ - } else { \ - (stamp) = cur>>psched_clock_scale; \ - } \ -} while (0) -#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz) -#define PSCHED_JIFFIE2US(delay) ((delay)*psched_clock_per_hz) - -#endif /* CONFIG_NET_SCH_CLK_CPU */ - -#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ - -#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY -#define PSCHED_TDIFF(tv1, tv2) \ -({ \ - int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ - int __delta = (tv1).tv_usec - (tv2).tv_usec; \ - if (__delta_sec) { \ - switch (__delta_sec) { \ - default: \ - __delta = 0; \ - case 2: \ - __delta += USEC_PER_SEC; \ - case 1: \ - __delta += USEC_PER_SEC; \ - } \ - } \ - __delta; \ -}) - -static inline int -psched_tod_diff(int delta_sec, int bound) -{ - int delta; - - if (bound <= USEC_PER_SEC || delta_sec > (0x7FFFFFFF/USEC_PER_SEC)-1) - return bound; - delta = delta_sec * USEC_PER_SEC; - if (delta > bound || delta < 0) - delta = bound; - return delta; -} - -#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \ -({ \ - int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ - int __delta = (tv1).tv_usec - (tv2).tv_usec; \ - switch (__delta_sec) { \ - default: \ - __delta = psched_tod_diff(__delta_sec, bound); break; \ - case 2: \ - __delta += USEC_PER_SEC; \ - case 1: \ - __delta += USEC_PER_SEC; \ - case 0: \ - if (__delta > bound || __delta < 0) \ - __delta = bound; \ - } \ - __delta; \ -}) - -#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \ - (tv1).tv_sec <= (tv2).tv_sec) || \ - (tv1).tv_sec < (tv2).tv_sec) - -#define PSCHED_TADD2(tv, delta, tv_res) \ -({ \ - int __delta = (tv).tv_usec + (delta); \ - (tv_res).tv_sec = (tv).tv_sec; \ - while (__delta >= USEC_PER_SEC) { (tv_res).tv_sec++; __delta -= USEC_PER_SEC; } \ - (tv_res).tv_usec = __delta; \ -}) - -#define PSCHED_TADD(tv, delta) \ -({ \ - (tv).tv_usec += (delta); \ - while ((tv).tv_usec >= USEC_PER_SEC) { (tv).tv_sec++; \ - (tv).tv_usec -= USEC_PER_SEC; } \ -}) - -/* Set/check that time is in the "past perfect"; - it depends on concrete representation of system time - */ - -#define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0) -#define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0) +/* Avoid doing 64 bit divide by 1000 */ +#define PSCHED_US2NS(x) ((s64)(x) << 10) +#define PSCHED_NS2US(x) ((x) >> 10) -#define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; }) +#define PSCHED_TICKS_PER_SEC PSCHED_NS2US(NSEC_PER_SEC) +#define PSCHED_GET_TIME(stamp) \ + ((stamp) = PSCHED_NS2US(ktime_to_ns(ktime_get()))) -#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ +#define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(PSCHED_US2NS((usecs)) / NSEC_PER_USEC) +#define PSCHED_JIFFIE2US(delay) PSCHED_NS2US(jiffies_to_usecs((delay)) * NSEC_PER_USEC) -#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2)) +#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2)) #define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \ - min_t(long long, (tv1) - (tv2), bound) - - -#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2)) + min_t(long long, (tv1) - (tv2), bound) +#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2)) #define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta)) -#define PSCHED_TADD(tv, delta) ((tv) += (delta)) +#define PSCHED_TADD(tv, delta) ((tv) += (delta)) #define PSCHED_SET_PASTPERFECT(t) ((t) = 0) #define PSCHED_IS_PASTPERFECT(t) ((t) == 0) #define PSCHED_AUDIT_TDIFF(t) -#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ - extern struct Qdisc_ops pfifo_qdisc_ops; extern struct Qdisc_ops bfifo_qdisc_ops; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index b74860aaf5f..f5cfde8c902 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -59,6 +59,7 @@ ktime_t ktime_get(void) return timespec_to_ktime(now); } +EXPORT_SYMBOL_GPL(ktime_get); /** * ktime_get_real - get the real (wall-) time in ktime_t format diff --git a/net/sched/Kconfig b/net/sched/Kconfig index f4544dd8647..475df8449be 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -46,62 +46,6 @@ config NET_SCH_FIFO if NET_SCHED -choice - prompt "Packet scheduler clock source" - default NET_SCH_CLK_GETTIMEOFDAY - ---help--- - Packet schedulers need a monotonic clock that increments at a static - rate. The kernel provides several suitable interfaces, each with - different properties: - - - high resolution (us or better) - - fast to read (minimal locking, no i/o access) - - synchronized on all processors - - handles cpu clock frequency changes - - but nothing provides all of the above. - -config NET_SCH_CLK_JIFFIES - bool "Timer interrupt" - ---help--- - Say Y here if you want to use the timer interrupt (jiffies) as clock - source. This clock source is fast, synchronized on all processors and - handles cpu clock frequency changes, but its resolution is too low - for accurate shaping except at very low speed. - -config NET_SCH_CLK_GETTIMEOFDAY - bool "gettimeofday" - ---help--- - Say Y here if you want to use gettimeofday as clock source. This clock - source has high resolution, is synchronized on all processors and - handles cpu clock frequency changes, but it is slow. - - Choose this if you need a high resolution clock source but can't use - the CPU's cycle counter. - -# don't allow on SMP x86 because they can have unsynchronized TSCs. -# gettimeofday is a good alternative -config NET_SCH_CLK_CPU - bool "CPU cycle counter" - depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64 - ---help--- - Say Y here if you want to use the CPU's cycle counter as clock source. - This is a cheap and high resolution clock source, but on some - architectures it is not synchronized on all processors and doesn't - handle cpu clock frequency changes. - - The useable cycle counters are: - - x86/x86_64 - Timestamp Counter - alpha - Cycle Counter - sparc64 - %ticks register - ppc64 - Time base - ia64 - Interval Time Counter - - Choose this if your CPU's cycle counter is working properly. - -endchoice - comment "Queueing/Scheduling" config NET_SCH_CBQ diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 4a927a5e1fa..d71bf79eb80 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1175,15 +1175,12 @@ reclassify: return -1; } -static int psched_us_per_tick = 1; -static int psched_tick_per_us = 1; - #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { seq_printf(seq, "%08x %08x %08x %08x\n", - psched_tick_per_us, psched_us_per_tick, - 1000000, HZ); + (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1), + 1000000, HZ); return 0; } @@ -1202,80 +1199,10 @@ static const struct file_operations psched_fops = { }; #endif -#ifdef CONFIG_NET_SCH_CLK_CPU -psched_tdiff_t psched_clock_per_hz; -int psched_clock_scale; -EXPORT_SYMBOL(psched_clock_per_hz); -EXPORT_SYMBOL(psched_clock_scale); - -psched_time_t psched_time_base; -cycles_t psched_time_mark; -EXPORT_SYMBOL(psched_time_mark); -EXPORT_SYMBOL(psched_time_base); - -/* - * Periodically adjust psched_time_base to avoid overflow - * with 32-bit get_cycles(). Safe up to 4GHz CPU. - */ -static void psched_tick(unsigned long); -static DEFINE_TIMER(psched_timer, psched_tick, 0, 0); - -static void psched_tick(unsigned long dummy) -{ - if (sizeof(cycles_t) == sizeof(u32)) { - psched_time_t dummy_stamp; - PSCHED_GET_TIME(dummy_stamp); - psched_timer.expires = jiffies + 1*HZ; - add_timer(&psched_timer); - } -} - -int __init psched_calibrate_clock(void) -{ - psched_time_t stamp, stamp1; - struct timeval tv, tv1; - psched_tdiff_t delay; - long rdelay; - unsigned long stop; - - psched_tick(0); - stop = jiffies + HZ/10; - PSCHED_GET_TIME(stamp); - do_gettimeofday(&tv); - while (time_before(jiffies, stop)) { - barrier(); - cpu_relax(); - } - PSCHED_GET_TIME(stamp1); - do_gettimeofday(&tv1); - - delay = PSCHED_TDIFF(stamp1, stamp); - rdelay = tv1.tv_usec - tv.tv_usec; - rdelay += (tv1.tv_sec - tv.tv_sec)*1000000; - if (rdelay > delay) - return -1; - delay /= rdelay; - psched_tick_per_us = delay; - while ((delay>>=1) != 0) - psched_clock_scale++; - psched_us_per_tick = 1<>psched_clock_scale; - return 0; -} -#endif - static int __init pktsched_init(void) { struct rtnetlink_link *link_p; -#ifdef CONFIG_NET_SCH_CLK_CPU - if (psched_calibrate_clock() < 0) - return -1; -#elif defined(CONFIG_NET_SCH_CLK_JIFFIES) - psched_tick_per_us = HZ< -#undef PSCHED_GET_TIME -#define PSCHED_GET_TIME(stamp) \ -do { \ - struct timeval tv; \ - do_gettimeofday(&tv); \ - (stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec; \ -} while (0) -#endif - #define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ @@ -394,28 +380,17 @@ cftree_update(struct hfsc_class *cl) * ism: (psched_us/byte) << ISM_SHIFT * dx: psched_us * - * Clock source resolution (CONFIG_NET_SCH_CLK_*) - * JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us. - * CPU: resolution is between 0.5us and 1us. - * GETTIMEOFDAY: resolution is exactly 1us. + * The clock source resolution with ktime is 1.024us. * * sm and ism are scaled in order to keep effective digits. * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective * digits in decimal using the following table. * - * Note: We can afford the additional accuracy (altq hfsc keeps at most - * 3 effective digits) thanks to the fact that linux clock is bounded - * much more tightly. - * * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps * ------------+------------------------------------------------------- - * bytes/0.5us 6.25e-3 62.5e-3 625e-3 6250e-e 62500e-3 - * bytes/us 12.5e-3 125e-3 1250e-3 12500e-3 125000e-3 - * bytes/1.27us 15.875e-3 158.75e-3 1587.5e-3 15875e-3 158750e-3 + * bytes/1.024us 12.8e-3 128e-3 1280e-3 12800e-3 128000e-3 * - * 0.5us/byte 160 16 1.6 0.16 0.016 - * us/byte 80 8 0.8 0.08 0.008 - * 1.27us/byte 63 6.3 0.63 0.063 0.0063 + * 1.024us/byte 78.125 7.8125 0.78125 0.078125 0.0078125 */ #define SM_SHIFT 20 #define ISM_SHIFT 18 -- cgit v1.2.3-70-g09d2 From 27a884dc3cb63b93c2b3b643f5b31eed5f8a4d26 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Apr 2007 20:29:13 -0700 Subject: [SK_BUFF]: Convert skb->tail to sk_buff_data_t So that it is also an offset from skb->head, reduces its size from 8 to 4 bytes on 64bit architectures, allowing us to combine the 4 bytes hole left by the layer headers conversion, reducing struct sk_buff size to 256 bytes, i.e. 4 64byte cachelines, and since the sk_buff slab cache is SLAB_HWCACHE_ALIGN... :-) Many calculations that previously required that skb->{transport,network, mac}_header be first converted to a pointer now can be done directly, being meaningful as offsets or pointers. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- arch/ia64/sn/kernel/xpnet.c | 10 ++--- drivers/atm/he.c | 4 +- drivers/atm/idt77252.c | 3 +- drivers/atm/nicstar.c | 10 +++-- drivers/infiniband/hw/amso1100/c2.c | 5 ++- drivers/isdn/i4l/isdn_net.c | 2 +- drivers/media/dvb/dvb-core/dvb_net.c | 10 +++-- drivers/net/cris/eth_v10.c | 2 +- drivers/net/cxgb3/sge.c | 6 +-- drivers/net/e1000/e1000_main.c | 4 +- drivers/net/ibm_emac/ibm_emac_core.c | 2 +- drivers/net/macb.c | 3 +- drivers/net/pcmcia/nmclan_cs.c | 2 +- drivers/net/s2io.c | 4 +- drivers/net/tulip/uli526x.c | 14 +++++-- drivers/net/wan/hdlc_fr.c | 2 +- drivers/net/wan/lmc/lmc_main.c | 4 +- drivers/net/wireless/hostap/hostap_80211_rx.c | 2 +- drivers/s390/net/ctcmain.c | 11 ++++-- drivers/s390/net/netiucv.c | 10 +++-- drivers/usb/atm/usbatm.c | 10 ++--- drivers/usb/net/asix.c | 6 +-- drivers/usb/net/gl620a.c | 2 +- drivers/usb/net/net1080.c | 2 +- drivers/usb/net/rndis_host.c | 2 +- include/linux/netfilter/nfnetlink.h | 4 +- include/linux/netlink.h | 2 +- include/linux/rtnetlink.h | 6 +-- include/linux/skbuff.h | 57 +++++++++++++++++++++------ include/net/inet_ecn.h | 6 +-- include/net/netlink.h | 8 ++-- include/net/pkt_cls.h | 2 +- kernel/audit.c | 8 ++-- net/atm/lec.c | 2 +- net/bluetooth/rfcomm/core.c | 2 +- net/core/dev.c | 4 +- net/core/filter.c | 2 +- net/core/gen_stats.c | 4 +- net/core/pktgen.c | 4 +- net/core/skbuff.c | 35 +++++++++------- net/core/wireless.c | 4 +- net/decnet/dn_nsp_out.c | 6 ++- net/decnet/dn_route.c | 4 +- net/decnet/dn_table.c | 8 ++-- net/decnet/netfilter/dn_rtmsg.c | 2 +- net/econet/af_econet.c | 2 +- net/ieee80211/ieee80211_rx.c | 2 +- net/ipv4/esp4.c | 8 ++-- net/ipv4/icmp.c | 3 +- net/ipv4/igmp.c | 4 +- net/ipv4/inet_diag.c | 12 +++--- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/ipmr.c | 9 +++-- net/ipv4/ipvs/ip_vs_ftp.c | 4 +- net/ipv4/netfilter/arpt_mangle.c | 8 ++-- net/ipv4/netfilter/ip_queue.c | 4 +- net/ipv4/netfilter/nf_nat_helper.c | 3 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv6/datagram.c | 2 +- net/ipv6/esp6.c | 8 ++-- net/ipv6/exthdrs.c | 2 +- net/ipv6/icmp.c | 3 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/mcast.c | 6 +-- net/ipv6/mip6.c | 4 +- net/ipv6/ndisc.c | 19 ++++----- net/ipv6/netfilter/ip6_queue.c | 4 +- net/ipv6/raw.c | 2 +- net/irda/ircomm/ircomm_param.c | 4 +- net/irda/irlan/irlan_common.c | 2 +- net/irda/qos.c | 14 +++---- net/netfilter/nf_conntrack_netlink.c | 16 +++----- net/netfilter/nfnetlink_log.c | 3 +- net/netfilter/nfnetlink_queue.c | 4 +- net/netlink/af_netlink.c | 2 +- net/packet/af_packet.c | 2 +- net/sched/act_api.c | 52 ++++++++++++------------ net/sched/act_gact.c | 2 +- net/sched/act_ipt.c | 2 +- net/sched/act_mirred.c | 2 +- net/sched/act_pedit.c | 2 +- net/sched/act_police.c | 8 ++-- net/sched/act_simple.c | 2 +- net/sched/cls_api.c | 14 +++---- net/sched/cls_basic.c | 4 +- net/sched/cls_fw.c | 4 +- net/sched/cls_route.c | 4 +- net/sched/cls_rsvp.h | 4 +- net/sched/cls_tcindex.c | 6 +-- net/sched/cls_u32.c | 6 +-- net/sched/ematch.c | 17 ++++---- net/sched/sch_api.c | 8 ++-- net/sched/sch_atm.c | 4 +- net/sched/sch_cbq.c | 20 +++++----- net/sched/sch_hfsc.c | 6 +-- net/sched/sch_htb.c | 10 ++--- net/sched/sch_ingress.c | 4 +- net/sched/sch_netem.c | 4 +- net/sched/sch_prio.c | 2 +- net/sched/sch_sfq.c | 2 +- net/sched/sch_tbf.c | 4 +- net/sctp/input.c | 4 +- net/sctp/inqueue.c | 8 ++-- net/sctp/sm_make_chunk.c | 4 +- net/sctp/sm_statefuns.c | 4 +- net/tipc/config.c | 2 +- net/tipc/socket.c | 2 +- net/xfrm/xfrm_user.c | 40 +++++++++---------- security/selinux/netlink.c | 2 +- 110 files changed, 396 insertions(+), 329 deletions(-) (limited to 'kernel') diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c index 68d59d912c9..eb416c95967 100644 --- a/arch/ia64/sn/kernel/xpnet.c +++ b/arch/ia64/sn/kernel/xpnet.c @@ -264,7 +264,7 @@ xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg) dev_dbg(xpnet, "head=0x%p skb->data=0x%p skb->tail=0x%p " "skb->end=0x%p skb->len=%d\n", (void *) skb->head, - (void *) skb->data, (void *) skb->tail, (void *) skb->end, + (void *)skb->data, skb_tail_pointer(skb), (void *)skb->end, skb->len); skb->protocol = eth_type_trans(skb, xpnet_device); @@ -272,7 +272,7 @@ xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg) dev_dbg(xpnet, "passing skb to network layer; \n\tskb->head=0x%p " "skb->data=0x%p skb->tail=0x%p skb->end=0x%p skb->len=%d\n", - (void *) skb->head, (void *) skb->data, (void *) skb->tail, + (void *)skb->head, (void *)skb->data, skb_tail_pointer(skb), (void *) skb->end, skb->len); @@ -475,7 +475,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p " "skb->end=0x%p skb->len=%d\n", (void *) skb->head, - (void *) skb->data, (void *) skb->tail, (void *) skb->end, + (void *)skb->data, skb_tail_pointer(skb), (void *)skb->end, skb->len); @@ -497,7 +497,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) /* get the beginning of the first cacheline and end of last */ start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1)); - end_addr = L1_CACHE_ALIGN((u64) skb->tail); + end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb)); /* calculate how many bytes to embed in the XPC message */ embedded_bytes = 0; @@ -573,7 +573,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) msg->magic = XPNET_MAGIC; msg->size = end_addr - start_addr; msg->leadin_ignore = (u64) skb->data - start_addr; - msg->tailout_ignore = end_addr - (u64) skb->tail; + msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb); msg->buf_pa = __pa(start_addr); dev_dbg(xpnet, "sending XPC message to %d:%d\nmsg->buf_pa=" diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 8510026b690..d33aba6864c 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -1901,13 +1901,13 @@ he_service_rbrq(struct he_dev *he_dev, int group) case ATM_AAL0: /* 2.10.1.5 raw cell receive */ skb->len = ATM_AAL0_SDU; - skb->tail = skb->data + skb->len; + skb_set_tail_pointer(skb, skb->len); break; case ATM_AAL5: /* 2.10.1.2 aal5 receive */ skb->len = AAL5_LEN(skb->data, he_vcc->pdu_len); - skb->tail = skb->data + skb->len; + skb_set_tail_pointer(skb, skb->len); #ifdef USE_CHECKSUM_HW if (vcc->vpi == 0 && vcc->vci >= ATM_NOT_RSV_VCI) { skb->ip_summed = CHECKSUM_COMPLETE; diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index b4b80140c39..1e49799cd6c 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -1816,7 +1816,8 @@ push_rx_skb(struct idt77252_dev *card, struct sk_buff *skb, int queue) u32 handle; u32 addr; - skb->data = skb->tail = skb->head; + skb->data = skb->head; + skb_reset_tail_pointer(skb); skb->len = 0; skb_reserve(skb, 16); diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index aab9b3733d5..26f4b703349 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -2208,7 +2208,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) if (i == 1 && ns_rsqe_eopdu(rsqe)) *((u32 *) sb->data) |= 0x00000002; skb_put(sb, NS_AAL0_HEADER); - memcpy(sb->tail, cell, ATM_CELL_PAYLOAD); + memcpy(skb_tail_pointer(sb), cell, ATM_CELL_PAYLOAD); skb_put(sb, ATM_CELL_PAYLOAD); ATM_SKB(sb)->vcc = vcc; __net_timestamp(sb); @@ -2252,7 +2252,8 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) vc->rx_iov = iovb; NS_SKB(iovb)->iovcnt = 0; iovb->len = 0; - iovb->tail = iovb->data = iovb->head; + iovb->data = iovb->head; + skb_reset_tail_pointer(iovb); NS_SKB(iovb)->vcc = vcc; /* IMPORTANT: a pointer to the sk_buff containing the small or large buffer is stored as iovec base, NOT a pointer to the @@ -2265,7 +2266,8 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, NS_MAX_IOVECS); NS_SKB(iovb)->iovcnt = 0; iovb->len = 0; - iovb->tail = iovb->data = iovb->head; + iovb->data = iovb->head; + skb_reset_tail_pointer(iovb); NS_SKB(iovb)->vcc = vcc; } iov = &((struct iovec *) iovb->data)[NS_SKB(iovb)->iovcnt++]; @@ -2489,7 +2491,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) { lb = (struct sk_buff *) iov->iov_base; tocopy = min_t(int, remaining, iov->iov_len); - memcpy(hb->tail, lb->data, tocopy); + memcpy(skb_tail_pointer(hb), lb->data, tocopy); skb_put(hb, tocopy); iov++; remaining -= tocopy; diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index 7698feafa6a..58bc272bd40 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -439,7 +439,8 @@ static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem) } /* Setup the skb for reuse since we're dropping this pkt */ - elem->skb->tail = elem->skb->data = elem->skb->head; + elem->skb->data = elem->skb->head; + skb_reset_tail_pointer(elem->skb); /* Zero out the rxp hdr in the sk_buff */ memset(elem->skb->data, 0, sizeof(*rxp_hdr)); @@ -521,7 +522,7 @@ static void c2_rx_interrupt(struct net_device *netdev) * "sizeof(struct c2_rxp_hdr)". */ skb->data += sizeof(*rxp_hdr); - skb->tail = skb->data + buflen; + skb_set_tail_pointer(skb, buflen); skb->len = buflen; skb->protocol = eth_type_trans(skb, netdev); diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index cd3b1fa4a41..aa83277aba7 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -881,7 +881,7 @@ isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp) addinfo[0] = '\0'; /* This check stolen from 2.1.72 dev_queue_xmit_nit() */ - if (p < skb->data || p >= skb->tail) { + if (p < skb->data || skb->network_header >= skb->tail) { /* fall back to old isdn_net_log_packet method() */ char * buf = skb->data; diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c index c6b004182d9..9de177a5b9f 100644 --- a/drivers/media/dvb/dvb-core/dvb_net.c +++ b/drivers/media/dvb/dvb-core/dvb_net.c @@ -600,6 +600,7 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len ) /* Check CRC32, we've got it in our skb already. */ unsigned short ulen = htons(priv->ule_sndu_len); unsigned short utype = htons(priv->ule_sndu_type); + const u8 *tail; struct kvec iov[3] = { { &ulen, sizeof ulen }, { &utype, sizeof utype }, @@ -613,10 +614,11 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len ) } ule_crc = iov_crc32(ule_crc, iov, 3); - expected_crc = *((u8 *)priv->ule_skb->tail - 4) << 24 | - *((u8 *)priv->ule_skb->tail - 3) << 16 | - *((u8 *)priv->ule_skb->tail - 2) << 8 | - *((u8 *)priv->ule_skb->tail - 1); + tail = skb_tail_pointer(priv->ule_skb); + expected_crc = *(tail - 4) << 24 | + *(tail - 3) << 16 | + *(tail - 2) << 8 | + *(tail - 1); if (ule_crc != expected_crc) { printk(KERN_WARNING "%lu: CRC32 check FAILED: %08x / %08x, SNDU len %d type %#x, ts_remain %d, next 2: %x.\n", priv->ts_count, ule_crc, expected_crc, priv->ule_sndu_len, priv->ule_sndu_type, ts_remain, ts_remain > 2 ? *(unsigned short *)from_where : 0); diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c index 98643801a3b..7feb9c56114 100644 --- a/drivers/net/cris/eth_v10.c +++ b/drivers/net/cris/eth_v10.c @@ -1348,7 +1348,7 @@ e100_rx(struct net_device *dev) #ifdef ETHDEBUG printk("head = 0x%x, data = 0x%x, tail = 0x%x, end = 0x%x\n", - skb->head, skb->data, skb->tail, skb->end); + skb->head, skb->data, skb_tail_pointer(skb), skb->end); printk("copying packet to 0x%x.\n", skb_data_ptr); #endif diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 610e4769efa..c5faf1380e1 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -1325,13 +1325,13 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb, flits = skb_transport_offset(skb) / 8; sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb), - skb->tail - skb_transport_header(skb), + skb->tail - skb->transport_header, adap->pdev); if (need_skb_unmap()) { setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits); skb->destructor = deferred_unmap_destructor; ((struct unmap_info *)skb->cb)->len = (skb->tail - - skb_transport_header(skb)); + skb->transport_header); } write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, @@ -1353,7 +1353,7 @@ static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb) return 1; /* packet fits as immediate data */ flits = skb_transport_offset(skb) / 8; /* headers */ - if (skb->tail != skb_transport_header(skb)) + if (skb->tail != skb->transport_header) cnt++; return flits_to_desc(flits + sgl_len(cnt)); } diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index e86deb2ef82..e7c93f44f81 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -3304,7 +3304,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) * NOTE: this is a TSO only workaround * if end byte alignment not correct move us * into the next dword */ - if ((unsigned long)(skb->tail - 1) & 4) + if ((unsigned long)(skb_tail_pointer(skb) - 1) & 4) break; /* fall through */ case e1000_82571: @@ -4388,7 +4388,7 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, PCI_DMA_FROMDEVICE); vaddr = kmap_atomic(ps_page->ps_page[0], KM_SKB_DATA_SOFTIRQ); - memcpy(skb->tail, vaddr, l1); + memcpy(skb_tail_pointer(skb), vaddr, l1); kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); pci_dma_sync_single_for_device(pdev, ps_page_dma->ps_page_dma[0], diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c index b1ad62d89eb..3d82d46f499 100644 --- a/drivers/net/ibm_emac/ibm_emac_core.c +++ b/drivers/net/ibm_emac/ibm_emac_core.c @@ -1338,7 +1338,7 @@ static inline int emac_rx_sg_append(struct ocp_enet_private *dev, int slot) dev_kfree_skb(dev->rx_sg_skb); dev->rx_sg_skb = NULL; } else { - cacheable_memcpy(dev->rx_sg_skb->tail, + cacheable_memcpy(skb_tail_pointer(dev->rx_sg_skb), dev->rx_skb[slot]->data, len); skb_put(dev->rx_sg_skb, len); emac_recycle_rx_skb(dev, slot, len); diff --git a/drivers/net/macb.c b/drivers/net/macb.c index 0c3649be0d0..98bf51afcee 100644 --- a/drivers/net/macb.c +++ b/drivers/net/macb.c @@ -575,7 +575,8 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) int i; dev_dbg(&bp->pdev->dev, "start_xmit: len %u head %p data %p tail %p end %p\n", - skb->len, skb->head, skb->data, skb->tail, skb->end); + skb->len, skb->head, skb->data, + skb_tail_pointer(skb), skb->end); dev_dbg(&bp->pdev->dev, "data:"); for (i = 0; i < 16; i++) diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c index ec0af65cd5d..73da611fd53 100644 --- a/drivers/net/pcmcia/nmclan_cs.c +++ b/drivers/net/pcmcia/nmclan_cs.c @@ -1185,7 +1185,7 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt) skb_reserve(skb, 2); insw(ioaddr + AM2150_RCV, skb_put(skb, pkt_len), pkt_len>>1); if (pkt_len & 1) - *(skb->tail-1) = inb(ioaddr + AM2150_RCV); + *(skb_tail_pointer(skb) - 1) = inb(ioaddr + AM2150_RCV); skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); /* Send the packet to the upper (protocol) layers. */ diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 46ebf141ee5..600d3ff347f 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -2195,7 +2195,7 @@ static int fill_rxd_3buf(struct s2io_nic *nic, struct RxD_t *rxdp, struct \ frag_list->next = NULL; tmp = (void *)ALIGN((long)frag_list->data, ALIGN_SIZE + 1); frag_list->data = tmp; - frag_list->tail = tmp; + skb_reset_tail_pointer(frag_list); /* Buffer-2 receives L4 data payload */ ((struct RxD3*)rxdp)->Buffer2_ptr = pci_map_single(nic->pdev, @@ -2349,7 +2349,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) tmp += ALIGN_SIZE; tmp &= ~ALIGN_SIZE; skb->data = (void *) (unsigned long)tmp; - skb->tail = (void *) (unsigned long)tmp; + skb_reset_tail_pointer(skb); if (!(((struct RxD3*)rxdp)->Buffer0_ptr)) ((struct RxD3*)rxdp)->Buffer0_ptr = diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c index 9a5850fa644..e46f4cb02c1 100644 --- a/drivers/net/tulip/uli526x.c +++ b/drivers/net/tulip/uli526x.c @@ -829,7 +829,9 @@ static void uli526x_rx_packet(struct net_device *dev, struct uli526x_board_info != NULL) ) { /* size less than COPY_SIZE, allocate a rxlen SKB */ skb_reserve(skb, 2); /* 16byte align */ - memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->tail, rxlen); + memcpy(skb_put(skb, rxlen), + skb_tail_pointer(rxptr->rx_skb_ptr), + rxlen); uli526x_reuse_skb(db, rxptr->rx_skb_ptr); } else skb_put(skb, rxlen); @@ -1175,7 +1177,10 @@ static void uli526x_reuse_skb(struct uli526x_board_info *db, struct sk_buff * sk if (!(rxptr->rdes0 & cpu_to_le32(0x80000000))) { rxptr->rx_skb_ptr = skb; - rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) ); + rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev, + skb_tail_pointer(skb), + RX_ALLOC_SIZE, + PCI_DMA_FROMDEVICE)); wmb(); rxptr->rdes0 = cpu_to_le32(0x80000000); db->rx_avail_cnt++; @@ -1339,7 +1344,10 @@ static void allocate_rx_buffer(struct uli526x_board_info *db) if ( ( skb = dev_alloc_skb(RX_ALLOC_SIZE) ) == NULL ) break; rxptr->rx_skb_ptr = skb; /* FIXME (?) */ - rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) ); + rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev, + skb_tail_pointer(skb), + RX_ALLOC_SIZE, + PCI_DMA_FROMDEVICE)); wmb(); rxptr->rdes0 = cpu_to_le32(0x80000000); rxptr = rxptr->next_rx_desc; diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index b747228c719..aeb2789adf2 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -533,7 +533,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep) skb->protocol = __constant_htons(NLPID_CCITT_ANSI_LMI); fr_hard_header(&skb, LMI_CCITT_ANSI_DLCI); } - data = skb->tail; + data = skb_tail_pointer(skb); data[i++] = LMI_CALLREF; data[i++] = dce ? LMI_STATUS : LMI_STATUS_ENQUIRY; if (lmi == LMI_ANSI) diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index d4851465c83..b731f3aae0d 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -1636,7 +1636,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/ if (nsb) { sc->lmc_rxq[i] = nsb; nsb->dev = dev; - sc->lmc_rxring[i].buffer1 = virt_to_bus (nsb->tail); + sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb)); } sc->failed_recv_alloc = 1; goto skip_packet; @@ -1679,7 +1679,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/ if (nsb) { sc->lmc_rxq[i] = nsb; nsb->dev = dev; - sc->lmc_rxring[i].buffer1 = virt_to_bus (nsb->tail); + sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb)); /* Transferred to 21140 below */ } else { diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c index 35a3a50724f..5e3e9e26270 100644 --- a/drivers/net/wireless/hostap/hostap_80211_rx.c +++ b/drivers/net/wireless/hostap/hostap_80211_rx.c @@ -922,7 +922,7 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb, if (frag != 0) flen -= hdrlen; - if (frag_skb->tail + flen > frag_skb->end) { + if (skb_tail_pointer(frag_skb) + flen > frag_skb->end) { printk(KERN_WARNING "%s: host decrypted and " "reassembled frame did not fit skb\n", dev->name); diff --git a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c index 787c0131704..54e3f806cd5 100644 --- a/drivers/s390/net/ctcmain.c +++ b/drivers/s390/net/ctcmain.c @@ -706,7 +706,8 @@ ch_action_txdone(fsm_instance * fi, int event, void *arg) spin_unlock(&ch->collect_lock); return; } - ch->trans_skb->tail = ch->trans_skb->data = ch->trans_skb_data; + ch->trans_skb->data = ch->trans_skb_data; + skb_reset_tail_pointer(ch->trans_skb); ch->trans_skb->len = 0; if (ch->prof.maxmulti < (ch->collect_len + 2)) ch->prof.maxmulti = ch->collect_len + 2; @@ -831,7 +832,8 @@ ch_action_rx(fsm_instance * fi, int event, void *arg) ctc_unpack_skb(ch, skb); } again: - skb->data = skb->tail = ch->trans_skb_data; + skb->data = ch->trans_skb_data; + skb_reset_tail_pointer(skb); skb->len = 0; if (ctc_checkalloc_buffer(ch, 1)) return; @@ -2226,7 +2228,8 @@ transmit_skb(struct channel *ch, struct sk_buff *skb) * IDAL support in CTC is broken, so we have to * care about skb's above 2G ourselves. */ - hi = ((unsigned long) skb->tail + LL_HEADER_LENGTH) >> 31; + hi = ((unsigned long)skb_tail_pointer(skb) + + LL_HEADER_LENGTH) >> 31; if (hi) { nskb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA); if (!nskb) { @@ -2262,7 +2265,7 @@ transmit_skb(struct channel *ch, struct sk_buff *skb) return -EBUSY; } - ch->trans_skb->tail = ch->trans_skb->data; + skb_reset_tail_pointer(ch->trans_skb); ch->trans_skb->len = 0; ch->ccw[1].count = skb->len; memcpy(skb_put(ch->trans_skb, skb->len), skb->data, diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 82edf201440..cd42bd54988 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -689,7 +689,8 @@ static void conn_action_rx(fsm_instance *fi, int event, void *arg) msg->length, conn->max_buffsize); return; } - conn->rx_buff->data = conn->rx_buff->tail = conn->rx_buff->head; + conn->rx_buff->data = conn->rx_buff->head; + skb_reset_tail_pointer(conn->rx_buff); conn->rx_buff->len = 0; rc = iucv_message_receive(conn->path, msg, 0, conn->rx_buff->data, msg->length, NULL); @@ -735,7 +736,8 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg) } } } - conn->tx_buff->data = conn->tx_buff->tail = conn->tx_buff->head; + conn->tx_buff->data = conn->tx_buff->head; + skb_reset_tail_pointer(conn->tx_buff); conn->tx_buff->len = 0; spin_lock_irqsave(&conn->collect_lock, saveflags); while ((skb = skb_dequeue(&conn->collect_queue))) { @@ -1164,8 +1166,8 @@ static int netiucv_transmit_skb(struct iucv_connection *conn, * Copy the skb to a new allocated skb in lowmem only if the * data is located above 2G in memory or tailroom is < 2. */ - unsigned long hi = - ((unsigned long)(skb->tail + NETIUCV_HDRLEN)) >> 31; + unsigned long hi = ((unsigned long)(skb_tail_pointer(skb) + + NETIUCV_HDRLEN)) >> 31; int copied = 0; if (hi || (skb_tailroom(skb) < 2)) { nskb = alloc_skb(skb->len + NETIUCV_HDRLEN + diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c index ec63b0ee074..4d8f282b23d 100644 --- a/drivers/usb/atm/usbatm.c +++ b/drivers/usb/atm/usbatm.c @@ -335,15 +335,15 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char sarb = instance->cached_vcc->sarb; - if (sarb->tail + ATM_CELL_PAYLOAD > sarb->end) { + if (skb_tail_pointer(sarb) + ATM_CELL_PAYLOAD > sarb->end) { atm_rldbg(instance, "%s: buffer overrun (sarb->len %u, vcc: 0x%p)!\n", __func__, sarb->len, vcc); /* discard cells already received */ skb_trim(sarb, 0); - UDSL_ASSERT(sarb->tail + ATM_CELL_PAYLOAD <= sarb->end); + UDSL_ASSERT(skb_tail_pointer(sarb) + ATM_CELL_PAYLOAD <= sarb->end); } - memcpy(sarb->tail, source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD); + memcpy(skb_tail_pointer(sarb), source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD); __skb_put(sarb, ATM_CELL_PAYLOAD); if (pti & 1) { @@ -370,7 +370,7 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char goto out; } - if (crc32_be(~0, sarb->tail - pdu_length, pdu_length) != 0xc704dd7b) { + if (crc32_be(~0, skb_tail_pointer(sarb) - pdu_length, pdu_length) != 0xc704dd7b) { atm_rldbg(instance, "%s: packet failed crc check (vcc: 0x%p)!\n", __func__, vcc); atomic_inc(&vcc->stats->rx_err); @@ -396,7 +396,7 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char goto out; /* atm_charge increments rx_drop */ } - memcpy(skb->data, sarb->tail - pdu_length, length); + memcpy(skb->data, skb_tail_pointer(sarb) - pdu_length, length); __skb_put(skb, length); vdbg("%s: sending skb 0x%p, skb->len %u, skb->truesize %u", diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c index 5808ea08245..f56e2dab371 100644 --- a/drivers/usb/net/asix.c +++ b/drivers/usb/net/asix.c @@ -298,7 +298,7 @@ static int asix_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (ax_skb) { ax_skb->len = size; ax_skb->data = packet; - ax_skb->tail = packet + size; + skb_set_tail_pointer(ax_skb, size); usbnet_skb_return(dev, ax_skb); } else { return 0; @@ -338,7 +338,7 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, && ((headroom + tailroom) >= (4 + padlen))) { if ((headroom < 4) || (tailroom < padlen)) { skb->data = memmove(skb->head + 4, skb->data, skb->len); - skb->tail = skb->data + skb->len; + skb_set_tail_pointer(skb, skb->len); } } else { struct sk_buff *skb2; @@ -356,7 +356,7 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, if ((skb->len % 512) == 0) { cpu_to_le32s(&padbytes); - memcpy( skb->tail, &padbytes, sizeof(padbytes)); + memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes)); skb_put(skb, sizeof(padbytes)); } return skb; diff --git a/drivers/usb/net/gl620a.c b/drivers/usb/net/gl620a.c index d257a8e026d..031cf5ca4db 100644 --- a/drivers/usb/net/gl620a.c +++ b/drivers/usb/net/gl620a.c @@ -157,7 +157,7 @@ genelink_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) if ((headroom < (4 + 4*1)) || (tailroom < padlen)) { skb->data = memmove(skb->head + (4 + 4*1), skb->data, skb->len); - skb->tail = skb->data + skb->len; + skb_set_tail_pointer(skb, skb->len); } } else { struct sk_buff *skb2; diff --git a/drivers/usb/net/net1080.c b/drivers/usb/net/net1080.c index ccebfdef475..19bf8dae70c 100644 --- a/drivers/usb/net/net1080.c +++ b/drivers/usb/net/net1080.c @@ -520,7 +520,7 @@ net1080_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) skb->data = memmove(skb->head + sizeof (struct nc_header), skb->data, skb->len); - skb->tail = skb->data + len; + skb_set_tail_pointer(skb, len); goto encapsulate; } } diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c index 39a21c74fdf..1d36772ba6e 100644 --- a/drivers/usb/net/rndis_host.c +++ b/drivers/usb/net/rndis_host.c @@ -588,7 +588,7 @@ rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) if (likely((sizeof *hdr) <= room)) { skb->data = memmove(skb->head + sizeof *hdr, skb->data, len); - skb->tail = skb->data + len; + skb_set_tail_pointer(skb, len); goto fill; } } diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 6179648a014..e1ea5dfbbbd 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -62,11 +62,11 @@ struct nfattr #define NFA_DATA(nfa) ((void *)(((char *)(nfa)) + NFA_LENGTH(0))) #define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0)) #define NFA_NEST(skb, type) \ -({ struct nfattr *__start = (struct nfattr *) (skb)->tail; \ +({ struct nfattr *__start = (struct nfattr *)skb_tail_pointer(skb); \ NFA_PUT(skb, (NFNL_NFA_NEST | type), 0, NULL); \ __start; }) #define NFA_NEST_END(skb, start) \ -({ (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \ +({ (start)->nfa_len = skb_tail_pointer(skb) - (unsigned char *)(start); \ (skb)->len; }) #define NFA_NEST_CANCEL(skb, start) \ ({ if (start) \ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index a9d3ad5bc80..68a632b372e 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -229,7 +229,7 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) (cb)->nlh->nlmsg_seq, type, len, flags) #define NLMSG_END(skb, nlh) \ -({ (nlh)->nlmsg_len = (skb)->tail - (unsigned char *) (nlh); \ +({ (nlh)->nlmsg_len = skb_tail_pointer(skb) - (unsigned char *)(nlh); \ (skb)->len; }) #define NLMSG_CANCEL(skb, nlh) \ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 4a629ea70cc..3a4cb242ecd 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -605,7 +605,7 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi #define RTA_PUT_NOHDR(skb, attrlen, data) \ ({ RTA_APPEND(skb, RTA_ALIGN(attrlen), data); \ - memset(skb->tail - (RTA_ALIGN(attrlen) - attrlen), 0, \ + memset(skb_tail_pointer(skb) - (RTA_ALIGN(attrlen) - attrlen), 0, \ RTA_ALIGN(attrlen) - attrlen); }) #define RTA_PUT_U8(skb, attrtype, value) \ @@ -637,12 +637,12 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi RTA_PUT(skb, attrtype, 0, NULL); #define RTA_NEST(skb, type) \ -({ struct rtattr *__start = (struct rtattr *) (skb)->tail; \ +({ struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \ RTA_PUT(skb, type, 0, NULL); \ __start; }) #define RTA_NEST_END(skb, start) \ -({ (start)->rta_len = ((skb)->tail - (unsigned char *) (start)); \ +({ (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \ (skb)->len; }) #define RTA_NEST_CANCEL(skb, start) \ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2e740550062..e1c2392ecb5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -246,9 +246,6 @@ struct sk_buff { int iif; /* 4 byte hole on 64 bit*/ - sk_buff_data_t transport_header; - sk_buff_data_t network_header; - sk_buff_data_t mac_header; struct dst_entry *dst; struct sec_path *sp; @@ -303,13 +300,16 @@ struct sk_buff { __u32 mark; + sk_buff_data_t transport_header; + sk_buff_data_t network_header; + sk_buff_data_t mac_header; /* These elements must be at the end, see alloc_skb() for details. */ - unsigned int truesize; - atomic_t users; + sk_buff_data_t tail; unsigned char *head, *data, - *tail, *end; + unsigned int truesize; + atomic_t users; }; #ifdef __KERNEL__ @@ -812,12 +812,45 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, #define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) +#ifdef NET_SKBUFF_DATA_USES_OFFSET +static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) +{ + return skb->head + skb->tail; +} + +static inline void skb_reset_tail_pointer(struct sk_buff *skb) +{ + skb->tail = skb->data - skb->head; +} + +static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) +{ + skb_reset_tail_pointer(skb); + skb->tail += offset; +} +#else /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) +{ + return skb->tail; +} + +static inline void skb_reset_tail_pointer(struct sk_buff *skb) +{ + skb->tail = skb->data; +} + +static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) +{ + skb->tail = skb->data + offset; +} +#endif /* NET_SKBUFF_DATA_USES_OFFSET */ + /* * Add data to an sk_buff */ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) { - unsigned char *tmp = skb->tail; + unsigned char *tmp = skb_tail_pointer(skb); SKB_LINEAR_ASSERT(skb); skb->tail += len; skb->len += len; @@ -835,11 +868,11 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) */ static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) { - unsigned char *tmp = skb->tail; + unsigned char *tmp = skb_tail_pointer(skb); SKB_LINEAR_ASSERT(skb); skb->tail += len; skb->len += len; - if (unlikely(skb->tail>skb->end)) + if (unlikely(skb_tail_pointer(skb) > skb->end)) skb_over_panic(skb, len, current_text_addr()); return tmp; } @@ -935,7 +968,7 @@ static inline int skb_headroom(const struct sk_buff *skb) */ static inline int skb_tailroom(const struct sk_buff *skb) { - return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail; + return skb_is_nonlinear(skb) ? 0 : skb->end - skb_tail_pointer(skb); } /** @@ -1127,8 +1160,8 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len) WARN_ON(1); return; } - skb->len = len; - skb->tail = skb->data + len; + skb->len = len; + skb_set_tail_pointer(skb, len); } /** diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 06a2c69a89e..de8399a7977 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -114,14 +114,12 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) { switch (skb->protocol) { case __constant_htons(ETH_P_IP): - if (skb_network_header(skb) + sizeof(struct iphdr) <= - skb->tail) + if (skb->network_header + sizeof(struct iphdr) <= skb->tail) return IP_ECN_set_ce(ip_hdr(skb)); break; case __constant_htons(ETH_P_IPV6): - if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= - skb->tail) + if (skb->network_header + sizeof(struct ipv6hdr) <= skb->tail) return IP6_ECN_set_ce(ipv6_hdr(skb)); break; } diff --git a/include/net/netlink.h b/include/net/netlink.h index bcaf67b7a19..2c7ab107f20 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -525,7 +525,7 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags) */ static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) { - nlh->nlmsg_len = skb->tail - (unsigned char *) nlh; + nlh->nlmsg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; return skb->len; } @@ -538,7 +538,7 @@ static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) */ static inline void *nlmsg_get_pos(struct sk_buff *skb) { - return skb->tail; + return skb_tail_pointer(skb); } /** @@ -940,7 +940,7 @@ static inline unsigned long nla_get_msecs(struct nlattr *nla) */ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) { - struct nlattr *start = (struct nlattr *) skb->tail; + struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb); if (nla_put(skb, attrtype, 0, NULL) < 0) return NULL; @@ -960,7 +960,7 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) */ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) { - start->nla_len = skb->tail - (unsigned char *) start; + start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start; return skb->len; } diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index dcb3a91f136..4129df70807 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -337,7 +337,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer) static inline int tcf_valid_offset(const struct sk_buff *skb, const unsigned char *ptr, const int len) { - return unlikely((ptr + len) < skb->tail && ptr > skb->head); + return unlikely((ptr + len) < skb_tail_pointer(skb) && ptr > skb->head); } #ifdef CONFIG_NET_CLS_IND diff --git a/kernel/audit.c b/kernel/audit.c index 76c9a11b72d..ea8521417d1 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1073,7 +1073,7 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt, goto out; } va_copy(args2, args); - len = vsnprintf(skb->tail, avail, fmt, args); + len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args); if (len >= avail) { /* The printk buffer is 1024 bytes long, so if we get * here and AUDIT_BUFSIZ is at least 1024, then we can @@ -1082,7 +1082,7 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt, max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail)); if (!avail) goto out; - len = vsnprintf(skb->tail, avail, fmt, args2); + len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2); } if (len > 0) skb_put(skb, len); @@ -1143,7 +1143,7 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, return; } - ptr = skb->tail; + ptr = skb_tail_pointer(skb); for (i=0; i>4]; /* Upper nibble */ *ptr++ = hex[buf[i] & 0x0F]; /* Lower nibble */ @@ -1175,7 +1175,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen, if (!avail) return; } - ptr = skb->tail; + ptr = skb_tail_pointer(skb); *ptr++ = '"'; memcpy(ptr, string, slen); ptr += slen; diff --git a/net/atm/lec.c b/net/atm/lec.c index d339645dc79..a8c6b285e06 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -283,7 +283,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) } DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n", - (long)skb->head, (long)skb->data, (long)skb->tail, + (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb), (long)skb->end); #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 94f45736056..10cc13cfae6 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1567,7 +1567,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) /* Trim FCS */ skb->len--; skb->tail--; - fcs = *(u8 *) skb->tail; + fcs = *(u8 *)skb_tail_pointer(skb); if (__check_fcs(skb->data, type, fcs)) { BT_ERR("bad checksum in packet"); diff --git a/net/core/dev.c b/net/core/dev.c index 6562e5736e2..86dc9f693f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1069,7 +1069,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb2); if (skb_network_header(skb2) < skb2->data || - skb_network_header(skb2) > skb2->tail) { + skb2->network_header > skb2->tail) { if (net_ratelimit()) printk(KERN_CRIT "protocol %04x is " "buggy, dev %s\n", @@ -1175,7 +1175,7 @@ int skb_checksum_help(struct sk_buff *skb) BUG_ON(offset > (int)skb->len); csum = skb_checksum(skb, offset, skb->len-offset, 0); - offset = skb->tail - skb_transport_header(skb); + offset = skb->tail - skb->transport_header; BUG_ON(offset <= 0); BUG_ON(skb->csum_offset + 2 > offset); diff --git a/net/core/filter.c b/net/core/filter.c index d2358a5e633..bd903aaf7aa 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -46,7 +46,7 @@ static void *__load_pointer(struct sk_buff *skb, int k) else if (k >= SKF_LL_OFF) ptr = skb_mac_header(skb) + k - SKF_LL_OFF; - if (ptr >= skb->head && ptr < skb->tail) + if (ptr >= skb->head && ptr < skb_tail_pointer(skb)) return ptr; return NULL; } diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 259473d0559..bcc25591d8a 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -61,7 +61,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, spin_lock_bh(lock); d->lock = lock; if (type) - d->tail = (struct rtattr *) skb->tail; + d->tail = (struct rtattr *)skb_tail_pointer(skb); d->skb = skb; d->compat_tc_stats = tc_stats_type; d->compat_xstats = xstats_type; @@ -212,7 +212,7 @@ int gnet_stats_finish_copy(struct gnet_dump *d) { if (d->tail) - d->tail->rta_len = d->skb->tail - (u8 *) d->tail; + d->tail->rta_len = skb_tail_pointer(d->skb) - (u8 *)d->tail; if (d->compat_tc_stats) if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats, diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 9da8357addc..f9469ea530c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2357,7 +2357,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IP); } - skb_set_network_header(skb, skb->tail - skb->data); + skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); @@ -2696,7 +2696,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IPV6); } - skb_set_network_header(skb, skb->tail - skb->data); + skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a48b0868126..ddcbc4d10da 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -87,8 +87,9 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly; void skb_over_panic(struct sk_buff *skb, int sz, void *here) { printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " - "data:%p tail:%p end:%p dev:%s\n", - here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, + "data:%p tail:%#lx end:%p dev:%s\n", + here, skb->len, sz, skb->head, skb->data, + (unsigned long)skb->tail, skb->end, skb->dev ? skb->dev->name : ""); BUG(); } @@ -105,8 +106,9 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here) void skb_under_panic(struct sk_buff *skb, int sz, void *here) { printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " - "data:%p tail:%p end:%p dev:%s\n", - here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, + "data:%p tail:%#lx end:%p dev:%s\n", + here, skb->len, sz, skb->head, skb->data, + (unsigned long)skb->tail, skb->end, skb->dev ? skb->dev->name : ""); BUG(); } @@ -167,7 +169,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, atomic_set(&skb->users, 1); skb->head = data; skb->data = data; - skb->tail = data; + skb_reset_tail_pointer(skb); skb->end = data + size; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); @@ -629,7 +631,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, /* Copy only real data... and, alas, header. This should be * optimized for the cases when header is void. */ - memcpy(data + nhead, skb->head, skb->tail - skb->head); + memcpy(data + nhead, skb->head, + skb->tail +#ifndef NET_SKBUFF_DATA_USES_OFFSET + - skb->head +#endif + ); memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) @@ -645,9 +652,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->head = data; skb->end = data + size; skb->data += off; - skb->tail += off; #ifndef NET_SKBUFF_DATA_USES_OFFSET - /* {transport,network,mac}_header are relative to skb->head */ + /* {transport,network,mac}_header and tail are relative to skb->head */ + skb->tail += off; skb->transport_header += off; skb->network_header += off; skb->mac_header += off; @@ -762,7 +769,7 @@ int skb_pad(struct sk_buff *skb, int pad) return 0; } - ntail = skb->data_len + pad - (skb->end - skb->tail); + ntail = skb->data_len + pad - (skb->end - skb_tail_pointer(skb)); if (likely(skb_cloned(skb) || ntail > 0)) { err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); if (unlikely(err)) @@ -863,7 +870,7 @@ done: } else { skb->len = len; skb->data_len = 0; - skb->tail = skb->data + len; + skb_set_tail_pointer(skb, len); } return 0; @@ -900,7 +907,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) * plus 128 bytes for future expansions. If we have enough * room at tail, reallocate without expansion only if skb is cloned. */ - int i, k, eat = (skb->tail + delta) - skb->end; + int i, k, eat = (skb_tail_pointer(skb) + delta) - skb->end; if (eat > 0 || skb_cloned(skb)) { if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, @@ -908,7 +915,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) return NULL; } - if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) + if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)) BUG(); /* Optimization: no fragments, no reasons to preestimate @@ -1004,7 +1011,7 @@ pull_pages: skb->tail += delta; skb->data_len -= delta; - return skb->tail; + return skb_tail_pointer(skb); } /* Copy some data bits from skb to kernel buffer. */ @@ -1539,7 +1546,7 @@ static inline void skb_split_inside_header(struct sk_buff *skb, skb1->len += skb1->data_len; skb->data_len = 0; skb->len = len; - skb->tail = skb->data + len; + skb_set_tail_pointer(skb, len); } static inline void skb_split_no_header(struct sk_buff *skb, diff --git a/net/core/wireless.c b/net/core/wireless.c index 7c6a5db544f..4a777b68e3b 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -1938,7 +1938,7 @@ static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb, { struct ifinfomsg *r; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r)); r = NLMSG_DATA(nlh); @@ -1952,7 +1952,7 @@ static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb, /* Add the wireless events in the netlink packet */ RTA_PUT(skb, IFLA_WIRELESS, event_len, event); - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 84b8c5b45fe..7404653880b 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -681,8 +681,10 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) if (scp->peer.sdn_objnum) type = 0; - skb_put(skb, dn_sockaddr2username(&scp->peer, skb->tail, type)); - skb_put(skb, dn_sockaddr2username(&scp->addr, skb->tail, 2)); + skb_put(skb, dn_sockaddr2username(&scp->peer, + skb_tail_pointer(skb), type)); + skb_put(skb, dn_sockaddr2username(&scp->addr, + skb_tail_pointer(skb), 2)); menuver = DN_MENUVER_ACC | DN_MENUVER_USR; if (scp->peer.sdn_flags & SDF_PROXY) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index bb73bf16630..9678b096b84 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1468,7 +1468,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, struct dn_route *rt = (struct dn_route *)skb->dst; struct rtmsg *r; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); long expires; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); @@ -1509,7 +1509,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, if (rt->fl.iif) RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 780a141f834..544c4554074 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -295,7 +295,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, { struct rtmsg *rtm; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); rtm = NLMSG_DATA(nlh); @@ -337,13 +337,13 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, nhp->rtnh_ifindex = nh->nh_oif; if (nh->nh_gw) RTA_PUT(skb, RTA_GATEWAY, 2, &nh->nh_gw); - nhp->rtnh_len = skb->tail - (unsigned char *)nhp; + nhp->rtnh_len = skb_tail_pointer(skb) - (unsigned char *)nhp; } endfor_nexthops(fi); mp_head->rta_type = RTA_MULTIPATH; - mp_head->rta_len = skb->tail - (u8*)mp_head; + mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; } - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 0e62def05a5..ceefd9dd0c9 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -33,7 +33,7 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp) { struct sk_buff *skb = NULL; size_t size; - unsigned char *old_tail; + sk_buff_data_t old_tail; struct nlmsghdr *nlh; unsigned char *ptr; struct nf_dn_rtmsg *rtm; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 78993dadb53..b5524f32ac2 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -366,7 +366,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, fh->cb = cb; fh->port = port; if (sock->type != SOCK_DGRAM) { - skb->tail = skb->data; + skb_reset_tail_pointer(skb); skb->len = 0; } else if (res < 0) goto out_free; diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 59a765c49cf..2b854941e06 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -595,7 +595,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, if (frag != 0) flen -= hdrlen; - if (frag_skb->tail + flen > frag_skb->end) { + if (skb_tail_pointer(frag_skb) + flen > frag_skb->end) { printk(KERN_WARNING "%s: host decrypted and " "reassembled frame did not fit skb\n", dev->name); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index de019f9fbfe..5e5613930ff 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -21,6 +21,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) struct blkcipher_desc desc; struct esp_data *esp; struct sk_buff *trailer; + u8 *tail; int blksize; int clen; int alen; @@ -49,12 +50,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) goto error; /* Fill padding... */ + tail = skb_tail_pointer(trailer); do { int i; for (i=0; ilen - 2; i++) - *(u8*)(trailer->tail + i) = i+1; + tail[i] = i + 1; } while (0); - *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2; + tail[clen - skb->len - 2] = (clen - skb->len) - 2; pskb_put(skb, trailer, clen - skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); @@ -62,7 +64,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esph = (struct ip_esp_hdr *)(skb_network_header(skb) + top_iph->ihl * 4); top_iph->tot_len = htons(skb->len + alen); - *(u8*)(trailer->tail - 1) = top_iph->protocol; + *(skb_tail_pointer(skb) - 1) = top_iph->protocol; /* this is non-NULL only with UDP Encapsulation */ if (x->encap) { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 8372f8b8f0c..d38cbba92a4 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -450,7 +450,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) */ iph = ip_hdr(skb_in); - if ((u8 *)iph < skb_in->head || (u8 *)(iph + 1) > skb_in->tail) + if ((u8 *)iph < skb_in->head || + (skb_in->network_header + sizeof(*iph)) > skb_in->tail) goto out; /* diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 1fc637fb675..2506021c293 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -348,8 +348,8 @@ static int igmpv3_sendpack(struct sk_buff *skb) { struct iphdr *pip = ip_hdr(skb); struct igmphdr *pig = igmp_hdr(skb); - const int iplen = skb->tail - skb_network_header(skb); - const int igmplen = skb->tail - skb_transport_header(skb); + const int iplen = skb->tail - skb->network_header; + const int igmplen = skb->tail - skb->transport_header; pip->tot_len = htons(iplen); ip_send_check(pip); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 5df71cd08da..37362cd1d07 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -60,7 +60,7 @@ static int inet_csk_diag_fill(struct sock *sk, struct nlmsghdr *nlh; void *info = NULL; struct inet_diag_meminfo *minfo = NULL; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); const struct inet_diag_handler *handler; handler = inet_diag_table[unlh->nlmsg_type]; @@ -147,7 +147,7 @@ static int inet_csk_diag_fill(struct sock *sk, icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) icsk->icsk_ca_ops->get_info(sk, ext, skb); - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: @@ -163,7 +163,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, { long tmo; struct inet_diag_msg *r; - const unsigned char *previous_tail = skb->tail; + const unsigned char *previous_tail = skb_tail_pointer(skb); struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); @@ -205,7 +205,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, &tw6->tw_v6_daddr); } #endif - nlh->nlmsg_len = skb->tail - previous_tail; + nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail; return skb->len; nlmsg_failure: skb_trim(skb, previous_tail - skb->data); @@ -535,7 +535,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, { const struct inet_request_sock *ireq = inet_rsk(req); struct inet_sock *inet = inet_sk(sk); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; @@ -574,7 +574,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, &inet6_rsk(req)->rmt_addr); } #endif - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index fcb35cd5ccf..c199d231173 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -316,7 +316,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); serr->port = port; - __skb_pull(skb, skb->tail - skb->data); + __skb_pull(skb, skb_tail_pointer(skb) - skb->data); skb_reset_transport_header(skb); if (sock_queue_err_skb(sk, skb)) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 50d0b301380..ea0a491dce9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -513,7 +513,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { - nlh->nlmsg_len = skb->tail - (u8*)nlh; + nlh->nlmsg_len = (skb_tail_pointer(skb) - + (u8 *)nlh); } else { nlh->nlmsg_type = NLMSG_ERROR; nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); @@ -580,7 +581,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) * Copy the IP header */ - skb_set_network_header(skb, skb->tail - skb->data); + skb->network_header = skb->tail; skb_put(skb, ihl); memcpy(skb->data,pkt->data,ihl); ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ @@ -1544,7 +1545,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) int ct; struct rtnexthop *nhp; struct net_device *dev = vif_table[c->mfc_parent].dev; - u8 *b = skb->tail; + u8 *b = skb_tail_pointer(skb); struct rtattr *mp_head; if (dev) @@ -1564,7 +1565,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) } } mp_head->rta_type = RTA_MULTIPATH; - mp_head->rta_len = skb->tail - (u8*)mp_head; + mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; rtm->rtm_type = RTN_MULTICAST; return 1; diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index 25bd6896730..344ddbbdc75 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -162,7 +162,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, iph = ip_hdr(*pskb); th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); data = (char *)th + (th->doff << 2); - data_limit = (*pskb)->tail; + data_limit = skb_tail_pointer(*pskb); if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING, @@ -269,7 +269,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, the length of the header in 32-bit multiples, it is accurate to calculate data address by th+HLEN*4 */ data = data_start = (char *)th + (th->doff << 2); - data_limit = (*pskb)->tail; + data_limit = skb_tail_pointer(*pskb); while (data <= data_limit - 6) { if (strnicmp(data, "PASV\r\n", 6) == 0) { diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index b4450f1ccc1..6298d404e7c 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -37,28 +37,28 @@ target(struct sk_buff **pskb, /* We assume that pln and hln were checked in the match */ if (mangle->flags & ARPT_MANGLE_SDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || - (arpptr + hln > (**pskb).tail)) + (arpptr + hln > skb_tail_pointer(*pskb))) return NF_DROP; memcpy(arpptr, mangle->src_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_SIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || - (arpptr + pln > (**pskb).tail)) + (arpptr + pln > skb_tail_pointer(*pskb))) return NF_DROP; memcpy(arpptr, &mangle->u_s.src_ip, pln); } arpptr += pln; if (mangle->flags & ARPT_MANGLE_TDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || - (arpptr + hln > (**pskb).tail)) + (arpptr + hln > skb_tail_pointer(*pskb))) return NF_DROP; memcpy(arpptr, mangle->tgt_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_TIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || - (arpptr + pln > (**pskb).tail)) + (arpptr + pln > skb_tail_pointer(*pskb))) return NF_DROP; memcpy(arpptr, &mangle->u_t.tgt_ip, pln); } diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 5842f1aa973..15e0d200223 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -191,7 +191,7 @@ ipq_flush(int verdict) static struct sk_buff * ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) { - unsigned char *old_tail; + sk_buff_data_t old_tail; size_t size = 0; size_t data_len = 0; struct sk_buff *skb; @@ -235,7 +235,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) if (!skb) goto nlmsg_failure; - old_tail= skb->tail; + old_tail = skb->tail; nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); pmsg = NLMSG_DATA(nlh); memset(pmsg, 0, sizeof(*pmsg)); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index c2c92ff1278..8a40fbe842b 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -92,7 +92,8 @@ static void mangle_contents(struct sk_buff *skb, /* move post-replacement */ memmove(data + match_offset + rep_len, data + match_offset + match_len, - skb->tail - (data + match_offset + match_len)); + skb->tail - (skb->network_header + dataoff + + match_offset + match_len)); /* insert data from buffer */ memcpy(data + match_offset, rep_buffer, rep_len); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2b214cc3724..18a09a78ca0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2231,7 +2231,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) th->cwr = 0; } while (skb->next); - delta = htonl(oldlen + (skb->tail - skb_transport_header(skb)) + + delta = htonl(oldlen + (skb->tail - skb->transport_header) + skb->data_len); th->check = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 29c53fbb220..c22cdcd8432 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -733,7 +733,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) } skb_shinfo(skb)->nr_frags = k; - skb->tail = skb->data; + skb_reset_tail_pointer(skb); skb->data_len -= len; skb->len = skb->data_len; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index f16f4f0c581..4a355fea409 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -268,7 +268,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); serr->port = fl->fl_ip_dport; - __skb_pull(skb, skb->tail - skb->data); + __skb_pull(skb, skb_tail_pointer(skb) - skb->data); skb_reset_transport_header(skb); if (sock_queue_err_skb(sk, skb)) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 7fdf84dee73..b8e8914cc00 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -51,6 +51,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int clen; int alen; int nfrags; + u8 *tail; struct esp_data *esp = x->data; int hdr_len = (skb_transport_offset(skb) + sizeof(*esph) + esp->conf.ivlen); @@ -78,18 +79,19 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) } /* Fill padding... */ + tail = skb_tail_pointer(trailer); do { int i; for (i=0; ilen - 2; i++) - *(u8*)(trailer->tail + i) = i+1; + tail[i] = i + 1; } while (0); - *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2; + tail[clen-skb->len - 2] = (clen - skb->len) - 2; pskb_put(skb, trailer, clen - skb->len); top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len); esph = (struct ipv6_esp_hdr *)skb_transport_header(skb); top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph)); - *(u8 *)(trailer->tail - 1) = *skb_network_header(skb); + *(skb_tail_pointer(skb) - 1) = *skb_network_header(skb); *skb_network_header(skb) = IPPROTO_ESP; esph->spi = x->id.spi; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a6a275db88c..275d2e812a4 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -51,7 +51,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) { const unsigned char *nh = skb_network_header(skb); - int packet_len = skb->tail - nh; + int packet_len = skb->tail - skb->network_header; struct ipv6_opt_hdr *hdr; int len; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index d3edc3cf1ce..e94992ab92e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -317,7 +317,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, int hlimit, tclass; int err = 0; - if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail) + if ((u8 *)hdr < skb->head || + (skb->network_header + sizeof(*hdr)) > skb->tail) return; /* diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b2c092c6b9d..e2b8db6b9ae 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -514,7 +514,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) u16 offset = sizeof(struct ipv6hdr); struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - unsigned int packet_len = skb->tail - skb_network_header(skb); + unsigned int packet_len = skb->tail - skb->network_header; int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 4c45bcce75e..6c2758951d6 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1423,7 +1423,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); - skb_set_transport_header(skb, skb->tail - skb->data); + skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data); skb_put(skb, sizeof(*pmr)); pmr = (struct mld2_report *)skb_transport_header(skb); pmr->type = ICMPV6_MLD2_REPORT; @@ -1468,8 +1468,8 @@ static void mld_sendpack(struct sk_buff *skb) int err; IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); - payload_len = skb->tail - skb_network_header(skb) - sizeof(*pip6); - mldlen = skb->tail - skb_transport_header(skb); + payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); + mldlen = skb->tail - skb->transport_header; pip6->payload_len = htons(payload_len); pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index f0288e92fb5..6ed763ee678 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -260,7 +260,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb->tail - nh; + unsigned int packet_len = skb->tail - skb->network_header; int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; @@ -392,7 +392,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb->tail - nh; + unsigned int packet_len = skb->tail - skb->network_header; int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f8e619772fb..b1cf7081647 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -492,7 +492,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, skb_reserve(skb, LL_RESERVED_SPACE(dev)); ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len); - skb_set_transport_header(skb, skb->tail - skb->data); + skb->transport_header = skb->tail; skb_put(skb, len); msg = (struct nd_msg *)skb_transport_header(skb); @@ -584,7 +584,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, skb_reserve(skb, LL_RESERVED_SPACE(dev)); ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len); - skb_set_transport_header(skb, skb->tail - skb->data); + skb->transport_header = skb->tail; skb_put(skb, len); msg = (struct nd_msg *)skb_transport_header(skb); msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION; @@ -685,7 +685,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, skb_reserve(skb, LL_RESERVED_SPACE(dev)); ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len); - skb_set_transport_header(skb, skb->tail - skb->data); + skb->transport_header = skb->tail; skb_put(skb, len); hdr = icmp6_hdr(skb); hdr->icmp6_type = NDISC_ROUTER_SOLICITATION; @@ -767,7 +767,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; - u32 ndoptlen = skb->tail - msg->opt; + u32 ndoptlen = skb->tail - (skb->transport_header + + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; struct inet6_ifaddr *ifp; @@ -945,7 +946,8 @@ static void ndisc_recv_na(struct sk_buff *skb) struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; - u32 ndoptlen = skb->tail - msg->opt; + u32 ndoptlen = skb->tail - (skb->transport_header + + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; struct inet6_ifaddr *ifp; @@ -1111,8 +1113,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) __u8 * opt = (__u8 *)(ra_msg + 1); - optlen = (skb->tail - skb_transport_header(skb)) - - sizeof(struct ra_msg); + optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK2(KERN_WARNING @@ -1361,7 +1362,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } - optlen = skb->tail - skb_transport_header(skb); + optlen = skb->tail - skb->transport_header; optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); if (optlen < 0) { @@ -1522,7 +1523,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr, IPPROTO_ICMPV6, len); - skb_set_transport_header(buff, buff->tail - buff->data); + skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data); skb_put(buff, len); icmph = icmp6_hdr(buff); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 66a2c413525..5cfce218c5e 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -189,7 +189,7 @@ ipq_flush(int verdict) static struct sk_buff * ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) { - unsigned char *old_tail; + sk_buff_data_t old_tail; size_t size = 0; size_t data_len = 0; struct sk_buff *skb; @@ -233,7 +233,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) if (!skb) goto nlmsg_failure; - old_tail= skb->tail; + old_tail = skb->tail; nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); pmsg = NLMSG_DATA(nlh); memset(pmsg, 0, sizeof(*pmsg)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8705f6a502d..2b3be68b70a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1077,7 +1077,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) - amount = skb->tail - skb_transport_header(skb); + amount = skb->tail - skb->transport_header; spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c index 01d7c9c7b3b..e5e4792a031 100644 --- a/net/irda/ircomm/ircomm_param.c +++ b/net/irda/ircomm/ircomm_param.c @@ -133,8 +133,8 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush) * Inserting is a little bit tricky since we don't know how much * room we will need. But this should hopefully work OK */ - count = irda_param_insert(self, pi, skb->tail, skb_tailroom(skb), - &ircomm_param_info); + count = irda_param_insert(self, pi, skb_tail_pointer(skb), + skb_tailroom(skb), &ircomm_param_info); if (count < 0) { IRDA_WARNING("%s(), no room for parameter!\n", __FUNCTION__); spin_unlock_irqrestore(&self->spinlock, flags); diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index fcf9d659962..ed69773b0f8 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -1039,7 +1039,7 @@ static int __irlan_insert_param(struct sk_buff *skb, char *param, int type, } /* Insert at end of sk-buffer */ - frame = skb->tail; + frame = skb_tail_pointer(skb); /* Make space for data */ if (skb_tailroom(skb) < (param_len+value_len+3)) { diff --git a/net/irda/qos.c b/net/irda/qos.c index 349012c926b..aeb18cf1dca 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -469,49 +469,49 @@ int irlap_insert_qos_negotiation_params(struct irlap_cb *self, int ret; /* Insert data rate */ - ret = irda_param_insert(self, PI_BAUD_RATE, skb->tail, + ret = irda_param_insert(self, PI_BAUD_RATE, skb_tail_pointer(skb), skb_tailroom(skb), &irlap_param_info); if (ret < 0) return ret; skb_put(skb, ret); /* Insert max turnaround time */ - ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb->tail, + ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb_tail_pointer(skb), skb_tailroom(skb), &irlap_param_info); if (ret < 0) return ret; skb_put(skb, ret); /* Insert data size */ - ret = irda_param_insert(self, PI_DATA_SIZE, skb->tail, + ret = irda_param_insert(self, PI_DATA_SIZE, skb_tail_pointer(skb), skb_tailroom(skb), &irlap_param_info); if (ret < 0) return ret; skb_put(skb, ret); /* Insert window size */ - ret = irda_param_insert(self, PI_WINDOW_SIZE, skb->tail, + ret = irda_param_insert(self, PI_WINDOW_SIZE, skb_tail_pointer(skb), skb_tailroom(skb), &irlap_param_info); if (ret < 0) return ret; skb_put(skb, ret); /* Insert additional BOFs */ - ret = irda_param_insert(self, PI_ADD_BOFS, skb->tail, + ret = irda_param_insert(self, PI_ADD_BOFS, skb_tail_pointer(skb), skb_tailroom(skb), &irlap_param_info); if (ret < 0) return ret; skb_put(skb, ret); /* Insert minimum turnaround time */ - ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb->tail, + ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb_tail_pointer(skb), skb_tailroom(skb), &irlap_param_info); if (ret < 0) return ret; skb_put(skb, ret); /* Insert link disconnect/threshold time */ - ret = irda_param_insert(self, PI_LINK_DISC, skb->tail, + ret = irda_param_insert(self, PI_LINK_DISC, skb_tail_pointer(skb), skb_tailroom(skb), &irlap_param_info); if (ret < 0) return ret; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 48f05314ebf..442300c633d 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -268,9 +268,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nfattr *nest_parms; - unsigned char *b; - - b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); event |= NFNL_SUBSYS_CTNETLINK << 8; nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); @@ -303,7 +301,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, ctnetlink_dump_use(skb, ct) < 0) goto nfattr_failure; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: @@ -322,7 +320,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, struct nf_conn *ct = (struct nf_conn *)ptr; struct sk_buff *skb; unsigned int type; - unsigned char *b; + sk_buff_data_t b; unsigned int flags = 0, group; /* ignore our fake conntrack entry */ @@ -1152,9 +1150,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned char *b; - - b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); @@ -1168,7 +1164,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nfattr_failure; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: @@ -1186,7 +1182,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr; struct sk_buff *skb; unsigned int type; - unsigned char *b; + sk_buff_data_t b; int flags = 0; if (events & IPEXP_NEW) { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 5eeebd2efa7..9709f94787f 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -409,15 +409,14 @@ __build_packet_message(struct nfulnl_instance *inst, const struct nf_loginfo *li, const char *prefix, unsigned int plen) { - unsigned char *old_tail; struct nfulnl_msg_packet_hdr pmsg; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; __be32 tmp_uint; + sk_buff_data_t old_tail = inst->skb->tail; UDEBUG("entered\n"); - old_tail = inst->skb->tail; nlh = NLMSG_PUT(inst->skb, 0, 0, NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, sizeof(struct nfgenmsg)); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index cfbee39f61d..b6585caa431 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -338,7 +338,7 @@ static struct sk_buff * nfqnl_build_packet_message(struct nfqnl_instance *queue, struct nfqnl_queue_entry *entry, int *errp) { - unsigned char *old_tail; + sk_buff_data_t old_tail; size_t size; size_t data_len = 0; struct sk_buff *skb; @@ -404,7 +404,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (!skb) goto nlmsg_failure; - old_tail= skb->tail; + old_tail = skb->tail; nlh = NLMSG_PUT(skb, 0, 0, NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, sizeof(struct nfgenmsg)); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 50dc5edb775..fdb6eb13cbc 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -785,7 +785,7 @@ static inline struct sk_buff *netlink_trim(struct sk_buff *skb, skb_orphan(skb); - delta = skb->end - skb->tail; + delta = skb->end - skb_tail_pointer(skb); if (delta * 2 < skb->truesize) return skb; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 51c059b09a3..36388b2f32f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -775,7 +775,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, err = -EINVAL; res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (sock->type != SOCK_DGRAM) { - skb->tail = skb->data; + skb_reset_tail_pointer(skb); skb->len = 0; } else if (res < 0) goto out_free; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index cb21617a567..28326fb1fc4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -93,7 +93,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, continue; a->priv = p; a->order = n_i; - r = (struct rtattr*) skb->tail; + r = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, a->order, 0, NULL); err = tcf_action_dump_1(skb, a, 0, 0); if (err < 0) { @@ -101,7 +101,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, skb_trim(skb, (u8*)r - skb->data); goto done; } - r->rta_len = skb->tail - (u8*)r; + r->rta_len = skb_tail_pointer(skb) - (u8 *)r; n_i++; if (n_i >= TCA_ACT_MAX_PRIO) goto done; @@ -125,7 +125,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, struct rtattr *r ; int i= 0, n_i = 0; - r = (struct rtattr*) skb->tail; + r = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, a->order, 0, NULL); RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); for (i = 0; i < (hinfo->hmask + 1); i++) { @@ -140,7 +140,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, } } RTA_PUT(skb, TCA_FCNT, 4, &n_i); - r->rta_len = skb->tail - (u8*)r; + r->rta_len = skb_tail_pointer(skb) - (u8 *)r; return n_i; rtattr_failure: @@ -423,7 +423,7 @@ int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { int err = -EINVAL; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *r; if (a->ops == NULL || a->ops->dump == NULL) @@ -432,10 +432,10 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); if (tcf_action_copy_stats(skb, a, 0)) goto rtattr_failure; - r = (struct rtattr*) skb->tail; + r = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, TCA_OPTIONS, 0, NULL); if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) { - r->rta_len = skb->tail - (u8*)r; + r->rta_len = skb_tail_pointer(skb) - (u8 *)r; return err; } @@ -449,17 +449,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) { struct tc_action *a; int err = -EINVAL; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *r ; while ((a = act) != NULL) { - r = (struct rtattr*) skb->tail; + r = (struct rtattr *)skb_tail_pointer(skb); act = a->next; RTA_PUT(skb, a->order, 0, NULL); err = tcf_action_dump_1(skb, a, bind, ref); if (err < 0) goto errout; - r->rta_len = skb->tail - (u8*)r; + r->rta_len = skb_tail_pointer(skb) - (u8 *)r; } return 0; @@ -635,7 +635,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, { struct tcamsg *t; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *x; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); @@ -645,15 +645,15 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, t->tca__pad1 = 0; t->tca__pad2 = 0; - x = (struct rtattr*) skb->tail; + x = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); if (tcf_action_dump(skb, a, bind, ref) < 0) goto rtattr_failure; - x->rta_len = skb->tail - (u8*)x; + x->rta_len = skb_tail_pointer(skb) - (u8 *)x; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: @@ -767,7 +767,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid) return -ENOBUFS; } - b = (unsigned char *)skb->tail; + b = skb_tail_pointer(skb); if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0) goto err_out; @@ -783,16 +783,16 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid) t->tca__pad1 = 0; t->tca__pad2 = 0; - x = (struct rtattr *) skb->tail; + x = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); err = a->ops->walk(skb, &dcb, RTM_DELACTION, a); if (err < 0) goto rtattr_failure; - x->rta_len = skb->tail - (u8 *) x; + x->rta_len = skb_tail_pointer(skb) - (u8 *)x; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); @@ -884,7 +884,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, if (!skb) return -ENOBUFS; - b = (unsigned char *)skb->tail; + b = skb_tail_pointer(skb); nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); t = NLMSG_DATA(nlh); @@ -892,15 +892,15 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, t->tca__pad1 = 0; t->tca__pad2 = 0; - x = (struct rtattr*) skb->tail; + x = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); if (tcf_action_dump(skb, a, 0, 0) < 0) goto rtattr_failure; - x->rta_len = skb->tail - (u8*)x; + x->rta_len = skb_tail_pointer(skb) - (u8 *)x; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; NETLINK_CB(skb).dst_group = RTNLGRP_TC; err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO); @@ -1015,7 +1015,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *x; struct tc_action_ops *a_o; struct tc_action a; @@ -1048,7 +1048,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) t->tca__pad1 = 0; t->tca__pad2 = 0; - x = (struct rtattr *) skb->tail; + x = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); ret = a_o->walk(skb, cb, RTM_GETACTION, &a); @@ -1056,12 +1056,12 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) goto rtattr_failure; if (ret > 0) { - x->rta_len = skb->tail - (u8 *) x; + x->rta_len = skb_tail_pointer(skb) - (u8 *)x; ret = skb->len; } else skb_trim(skb, (u8*)x - skb->data); - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; if (NETLINK_CB(cb->skb).pid && ret) nlh->nlmsg_flags |= NLM_F_MULTI; module_put(a_o->owner); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 87d0faf3286..aad748b3b38 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -155,7 +155,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_gact opt; struct tcf_gact *gact = a->priv; struct tcf_t t; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 47f0b132423..2ccfd5b20fa 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -245,7 +245,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tcf_ipt *ipt = a->priv; struct ipt_entry_target *t; struct tcf_t tm; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 3e93683e9ab..15f6ecdaf61 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -206,7 +206,7 @@ bad_mirred: static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = a->priv; struct tc_mirred opt; struct tcf_t t; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 20813eee8af..d654cea1a46 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -195,7 +195,7 @@ done: static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tcf_pedit *p = a->priv; struct tc_pedit *opt; struct tcf_t t; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 10a5a5c36f7..068b2376366 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -80,7 +80,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c continue; a->priv = p; a->order = index; - r = (struct rtattr*) skb->tail; + r = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, a->order, 0, NULL); if (type == RTM_DELACTION) err = tcf_action_dump_1(skb, a, 0, 1); @@ -91,7 +91,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c skb_trim(skb, (u8*)r - skb->data); goto done; } - r->rta_len = skb->tail - (u8*)r; + r->rta_len = skb_tail_pointer(skb) - (u8 *)r; n_i++; } } @@ -326,7 +326,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tcf_police *police = a->priv; struct tc_police opt; @@ -572,7 +572,7 @@ EXPORT_SYMBOL(tcf_police); int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_police opt; opt.index = police->tcf_index; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index c7971182af0..ecbcfa59b76 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -155,7 +155,7 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind) static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tcf_defact *d = a->priv; struct tc_defact opt; struct tcf_t t; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5c6ffdb77d2..84231baf77d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -323,7 +323,7 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, { struct tcmsg *tcm; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); @@ -340,7 +340,7 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0) goto rtattr_failure; } - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: @@ -563,30 +563,30 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, * to work with both old and new modes of entering * tc data even if iproute2 was newer - jhs */ - struct rtattr * p_rta = (struct rtattr*) skb->tail; + struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb); if (exts->action->type != TCA_OLD_COMPAT) { RTA_PUT(skb, map->action, 0, NULL); if (tcf_action_dump(skb, exts->action, 0, 0) < 0) goto rtattr_failure; - p_rta->rta_len = skb->tail - (u8*)p_rta; + p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; } else if (map->police) { RTA_PUT(skb, map->police, 0, NULL); if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0) goto rtattr_failure; - p_rta->rta_len = skb->tail - (u8*)p_rta; + p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; } } #elif defined CONFIG_NET_CLS_POLICE if (map->police && exts->police) { - struct rtattr * p_rta = (struct rtattr*) skb->tail; + struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, map->police, 0, NULL); if (tcf_police_dump(skb, exts->police) < 0) goto rtattr_failure; - p_rta->rta_len = skb->tail - (u8*)p_rta; + p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; } #endif return 0; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 4a91f082a81..800ec2ac326 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -245,7 +245,7 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct basic_filter *f = (struct basic_filter *) fh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; if (f == NULL) @@ -263,7 +263,7 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh, tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) goto rtattr_failure; - rta->rta_len = (skb->tail - b); + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 5dbb9d451f7..f5f355852a8 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -348,7 +348,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, { struct fw_head *head = (struct fw_head *)tp->root; struct fw_filter *f = (struct fw_filter*)fh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; if (f == NULL) @@ -374,7 +374,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) goto rtattr_failure; - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0) goto rtattr_failure; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index abc47cc48ad..1f94df36239 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -562,7 +562,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct route4_filter *f = (struct route4_filter*)fh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; u32 id; @@ -591,7 +591,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh, if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0) goto rtattr_failure; - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0) goto rtattr_failure; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 6f373b020eb..87ed6f3c507 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -593,7 +593,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, { struct rsvp_filter *f = (struct rsvp_filter*)fh; struct rsvp_session *s; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; struct tc_rsvp_pinfo pinfo; @@ -623,7 +623,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) goto rtattr_failure; - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0) goto rtattr_failure; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 7563fdcef4b..0537d6066b4 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -448,7 +448,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh, { struct tcindex_data *p = PRIV(tp); struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n", @@ -463,7 +463,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh, RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift); RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through), &p->fall_through); - rta->rta_len = skb->tail-b; + rta->rta_len = skb_tail_pointer(skb) - b; } else { if (p->perfect) { t->tcm_handle = r-p->perfect; @@ -486,7 +486,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh, if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0) goto rtattr_failure; - rta->rta_len = skb->tail-b; + rta->rta_len = skb_tail_pointer(skb) - b; if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0) goto rtattr_failure; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 695b34051b9..fa11bb75004 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -213,7 +213,7 @@ check_terminal: off2 = 0; } - if (ptr < skb->tail) + if (ptr < skb_tail_pointer(skb)) goto next_ht; } @@ -718,7 +718,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct tc_u_knode *n = (struct tc_u_knode*)fh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; if (n == NULL) @@ -765,7 +765,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, #endif } - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; if (TC_U32_KEY(n->handle)) if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0) goto rtattr_failure; diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 959c306c571..63146d339d8 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -418,17 +418,19 @@ void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree) int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv) { int i; - struct rtattr * top_start = (struct rtattr*) skb->tail; - struct rtattr * list_start; + u8 *tail; + struct rtattr *top_start = (struct rtattr *)skb_tail_pointer(skb); + struct rtattr *list_start; RTA_PUT(skb, tlv, 0, NULL); RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr); - list_start = (struct rtattr *) skb->tail; + list_start = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL); + tail = skb_tail_pointer(skb); for (i = 0; i < tree->hdr.nmatches; i++) { - struct rtattr *match_start = (struct rtattr*) skb->tail; + struct rtattr *match_start = (struct rtattr *)tail; struct tcf_ematch *em = tcf_em_get_match(tree, i); struct tcf_ematch_hdr em_hdr = { .kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER, @@ -447,11 +449,12 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv) } else if (em->datalen > 0) RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data); - match_start->rta_len = skb->tail - (u8*) match_start; + tail = skb_tail_pointer(skb); + match_start->rta_len = tail - (u8 *)match_start; } - list_start->rta_len = skb->tail - (u8 *) list_start; - top_start->rta_len = skb->tail - (u8 *) top_start; + list_start->rta_len = tail - (u8 *)list_start; + top_start->rta_len = tail - (u8 *)top_start; return 0; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ecbdc6b42a9..7482a950717 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -813,7 +813,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, { struct tcmsg *tcm; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); @@ -847,7 +847,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, if (gnet_stats_finish_copy(&d) < 0) goto rtattr_failure; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: @@ -1051,7 +1051,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, { struct tcmsg *tcm; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; @@ -1076,7 +1076,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, if (gnet_stats_finish_copy(&d) < 0) goto rtattr_failure; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index baca8743c12..1d7bb163213 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -631,7 +631,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow = (struct atm_flow_data *) cl; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", @@ -661,7 +661,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero); } - rta->rta_len = skb->tail-b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index d83414d828d..be98a01253e 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1465,7 +1465,7 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt) static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate); return skb->len; @@ -1477,7 +1477,7 @@ rtattr_failure: static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_lssopt opt; opt.flags = 0; @@ -1502,7 +1502,7 @@ rtattr_failure: static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_wrropt opt; opt.flags = 0; @@ -1520,7 +1520,7 @@ rtattr_failure: static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_ovl opt; opt.strategy = cl->ovl_strategy; @@ -1537,7 +1537,7 @@ rtattr_failure: static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_fopt opt; if (cl->split || cl->defmap) { @@ -1556,7 +1556,7 @@ rtattr_failure: #ifdef CONFIG_NET_CLS_POLICE static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_police opt; if (cl->police) { @@ -1590,14 +1590,14 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct cbq_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; rta = (struct rtattr*)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); if (cbq_dump_attr(skb, &q->link) < 0) goto rtattr_failure; - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: @@ -1619,7 +1619,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct cbq_class *cl = (struct cbq_class*)arg; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; if (cl->tparent) @@ -1633,7 +1633,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg, RTA_PUT(skb, TCA_OPTIONS, 0, NULL); if (cbq_dump_attr(skb, cl) < 0) goto rtattr_failure; - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 5197b6caaf2..80e6f811e3b 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1363,7 +1363,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct hfsc_class *cl = (struct hfsc_class *)arg; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta = (struct rtattr *)b; tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT; @@ -1374,7 +1374,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, RTA_PUT(skb, TCA_OPTIONS, 0, NULL); if (hfsc_dump_curves(skb, cl) < 0) goto rtattr_failure; - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: @@ -1576,7 +1576,7 @@ static int hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) { struct hfsc_sched *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; qopt.defcls = q->defcls; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index f76c20c0a10..c687388a8cb 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1110,7 +1110,7 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { struct htb_sched *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; struct tc_htb_glob gopt; spin_lock_bh(&sch->dev->queue_lock); @@ -1123,12 +1123,12 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) rta = (struct rtattr *)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; spin_unlock_bh(&sch->dev->queue_lock); return skb->len; rtattr_failure: spin_unlock_bh(&sch->dev->queue_lock); - skb_trim(skb, skb->tail - skb->data); + skb_trim(skb, skb_tail_pointer(skb) - skb->data); return -1; } @@ -1136,7 +1136,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; struct tc_htb_opt opt; @@ -1159,7 +1159,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, opt.prio = cl->un.leaf.prio; opt.level = cl->level; RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; spin_unlock_bh(&sch->dev->queue_lock); return skb->len; rtattr_failure: diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index cfe070ee6ee..d19f4070c23 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -362,12 +362,12 @@ static void ingress_destroy(struct Qdisc *sch) static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; rta = (struct rtattr *) b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 915f82a2cc3..2a9b1e429ff 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -583,7 +583,7 @@ static void netem_destroy(struct Qdisc *sch) static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) { const struct netem_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta = (struct rtattr *) b; struct tc_netem_qopt qopt; struct tc_netem_corr cor; @@ -611,7 +611,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) corrupt.correlation = q->corrupt_cor.rho; RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index de889f23f22..5b371109ec1 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -271,7 +271,7 @@ static int prio_init(struct Qdisc *sch, struct rtattr *opt) static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct prio_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_prio_qopt opt; opt.bands = q->bands; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index e3695407afc..a511ba83e26 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -461,7 +461,7 @@ static void sfq_destroy(struct Qdisc *sch) static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct sfq_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_sfq_qopt opt; opt.quantum = q->quantum; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index f14692f3a14..231895562c6 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -387,7 +387,7 @@ static void tbf_destroy(struct Qdisc *sch) static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tbf_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; struct tc_tbf_qopt opt; @@ -403,7 +403,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) opt.mtu = q->mtu; opt.buffer = q->buffer; RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt); - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; diff --git a/net/sctp/input.c b/net/sctp/input.c index 1ff47b18724..18b97eedc1f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -612,7 +612,7 @@ int sctp_rcv_ootb(struct sk_buff *skb) break; ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); - if (ch_end > skb->tail) + if (ch_end > skb_tail_pointer(skb)) break; /* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the @@ -644,7 +644,7 @@ int sctp_rcv_ootb(struct sk_buff *skb) } ch = (sctp_chunkhdr_t *) ch_end; - } while (ch_end < skb->tail); + } while (ch_end < skb_tail_pointer(skb)); return 0; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index c30629e1778..88aa2240754 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -159,16 +159,16 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) * the skb->tail. */ if (unlikely(skb_is_nonlinear(chunk->skb))) { - if (chunk->chunk_end > chunk->skb->tail) - chunk->chunk_end = chunk->skb->tail; + if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) + chunk->chunk_end = skb_tail_pointer(chunk->skb); } skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ - if (chunk->chunk_end < chunk->skb->tail) { + if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) { /* This is not a singleton */ chunk->singleton = 0; - } else if (chunk->chunk_end > chunk->skb->tail) { + } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) { /* RFC 2960, Section 6.10 Bundling * * Partial chunks MUST NOT be placed in an SCTP packet. diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 60c5b59d4c6..759ea3d1997 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1143,7 +1143,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) /* Adjust the chunk length field. */ chunk->chunk_hdr->length = htons(chunklen + padlen + len); - chunk->chunk_end = chunk->skb->tail; + chunk->chunk_end = skb_tail_pointer(chunk->skb); return target; } @@ -1168,7 +1168,7 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int off, int len, /* Adjust the chunk length field. */ chunk->chunk_hdr->length = htons(ntohs(chunk->chunk_hdr->length) + len); - chunk->chunk_end = chunk->skb->tail; + chunk->chunk_end = skb_tail_pointer(chunk->skb); out: return err; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index bf502c499c8..438e5dc5c71 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3115,7 +3115,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, break; ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); - if (ch_end > skb->tail) + if (ch_end > skb_tail_pointer(skb)) break; if (SCTP_CID_SHUTDOWN_ACK == ch->type) @@ -3130,7 +3130,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, return sctp_sf_pdiscard(ep, asoc, type, arg, commands); ch = (sctp_chunkhdr_t *) ch_end; - } while (ch_end < skb->tail); + } while (ch_end < skb_tail_pointer(skb)); if (ootb_shut_ack) sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands); diff --git a/net/tipc/config.c b/net/tipc/config.c index 14789a82de5..c71337a22d3 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -89,7 +89,7 @@ struct sk_buff *tipc_cfg_reply_alloc(int payload_size) int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, void *tlv_data, int tlv_data_size) { - struct tlv_desc *tlv = (struct tlv_desc *)buf->tail; + struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf); int new_tlv_space = TLV_SPACE(tlv_data_size); if (skb_tailroom(buf) < new_tlv_space) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b71739fbe2c..45832fb75ea 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1020,7 +1020,7 @@ restart: if (!err) { buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle); - sz = buf->tail - buf_crs; + sz = skb_tail_pointer(buf) - buf_crs; needed = (buf_len - sz_copied); sz_to_copy = (sz <= needed) ? sz : needed; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 816e3690b60..814bb3125ad 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -576,7 +576,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) struct sk_buff *skb = sp->out_skb; struct xfrm_usersa_info *p; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); if (sp->this_idx < sp->start_idx) goto out; @@ -621,7 +621,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) if (x->lastused) RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused); - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; out: sp->this_idx++; return 0; @@ -1157,7 +1157,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct sk_buff *in_skb = sp->in_skb; struct sk_buff *skb = sp->out_skb; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); if (sp->this_idx < sp->start_idx) goto out; @@ -1176,7 +1176,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; out: sp->this_idx++; return 0; @@ -1330,7 +1330,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve struct xfrm_aevent_id *id; struct nlmsghdr *nlh; struct xfrm_lifetime_cur ltime; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id)); id = NLMSG_DATA(nlh); @@ -1362,7 +1362,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve RTA_PUT(skb,XFRMA_ETIMER_THRESH,sizeof(u32),&etimer); } - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: @@ -1744,7 +1744,7 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); int i; nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id)); @@ -1764,7 +1764,7 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, goto nlmsg_failure; } - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: skb_trim(skb, b - skb->data); @@ -1942,7 +1942,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve { struct xfrm_user_expire *ue; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue)); @@ -1952,7 +1952,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve copy_to_user_state(x, &ue->state); ue->hard = (c->data.hard != 0) ? 1 : 0; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: @@ -1999,7 +1999,7 @@ static int xfrm_notify_sa_flush(struct km_event *c) struct xfrm_usersa_flush *p; struct nlmsghdr *nlh; struct sk_buff *skb; - unsigned char *b; + sk_buff_data_t b; int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush)); skb = alloc_skb(len, GFP_ATOMIC); @@ -2045,7 +2045,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) struct xfrm_usersa_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - unsigned char *b; + sk_buff_data_t b; int len = xfrm_sa_len(x); int headlen; @@ -2129,7 +2129,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, { struct xfrm_user_acquire *ua; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); __u32 seq = xfrm_get_acqseq(); nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE, @@ -2153,7 +2153,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: @@ -2249,7 +2249,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, struct xfrm_user_polexpire *upe; struct nlmsghdr *nlh; int hard = c->data.hard; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe)); upe = NLMSG_DATA(nlh); @@ -2264,7 +2264,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, goto nlmsg_failure; upe->hard = !!hard; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: @@ -2300,7 +2300,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - unsigned char *b; + sk_buff_data_t b; int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); int headlen; @@ -2357,7 +2357,7 @@ static int xfrm_notify_policy_flush(struct km_event *c) { struct nlmsghdr *nlh; struct sk_buff *skb; - unsigned char *b; + sk_buff_data_t b; int len = 0; #ifdef CONFIG_XFRM_SUB_POLICY len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); @@ -2410,7 +2410,7 @@ static int build_report(struct sk_buff *skb, u8 proto, { struct xfrm_user_report *ur; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur)); ur = NLMSG_DATA(nlh); @@ -2422,7 +2422,7 @@ static int build_report(struct sk_buff *skb, u8 proto, if (addr) RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index e203883406d..33f2e064a68 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -66,7 +66,7 @@ static void selnl_add_payload(struct nlmsghdr *nlh, int len, int msgtype, void * static void selnl_notify(int msgtype, void *data) { int len; - unsigned char *tmp; + sk_buff_data_t tmp; struct sk_buff *skb; struct nlmsghdr *nlh; -- cgit v1.2.3-70-g09d2 From b529ccf2799c14346d1518e9bdf1f88f03643e99 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 25 Apr 2007 19:08:35 -0700 Subject: [NETLINK]: Introduce nlmsg_hdr() helper For the common "(struct nlmsghdr *)skb->data" sequence, so that we reduce the number of direct accesses to skb->data and for consistency with all the other cast skb member helpers. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/connector/connector.c | 2 +- drivers/scsi/scsi_netlink.c | 2 +- drivers/scsi/scsi_transport_iscsi.c | 2 +- fs/ecryptfs/netlink.c | 4 ++-- include/linux/netlink.h | 5 +++++ kernel/audit.c | 6 +++--- kernel/taskstats.c | 4 ++-- net/decnet/netfilter/dn_rtmsg.c | 2 +- net/ipv4/fib_frontend.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/ipv4/netfilter/ip_queue.c | 2 +- net/ipv6/netfilter/ip6_queue.c | 2 +- net/netlink/af_netlink.c | 2 +- net/tipc/netlink.c | 2 +- security/selinux/hooks.c | 2 +- 15 files changed, 23 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index a905f782033..7f9c4fb7e5b 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -212,7 +212,7 @@ static void cn_rx_skb(struct sk_buff *__skb) skb = skb_get(__skb); if (skb->len >= NLMSG_SPACE(0)) { - nlh = (struct nlmsghdr *)skb->data; + nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(struct cn_msg) || skb->len < nlh->nlmsg_len || diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c index 1b59b27e887..45646a28524 100644 --- a/drivers/scsi/scsi_netlink.c +++ b/drivers/scsi/scsi_netlink.c @@ -50,7 +50,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb) while (skb->len >= NLMSG_SPACE(0)) { err = 0; - nlh = (struct nlmsghdr *) skb->data; + nlh = nlmsg_hdr(skb); if ((nlh->nlmsg_len < (sizeof(*nlh) + sizeof(*hdr))) || (skb->len < nlh->nlmsg_len)) { printk(KERN_WARNING "%s: discarding partial skb\n", diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index ce0d14af33c..10590cd7e9e 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1081,7 +1081,7 @@ iscsi_if_rx(struct sock *sk, int len) struct nlmsghdr *nlh; struct iscsi_uevent *ev; - nlh = (struct nlmsghdr *)skb->data; + nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) { break; diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c index e3aa2253c85..8405d216a5f 100644 --- a/fs/ecryptfs/netlink.c +++ b/fs/ecryptfs/netlink.c @@ -97,7 +97,7 @@ out: */ static int ecryptfs_process_nl_response(struct sk_buff *skb) { - struct nlmsghdr *nlh = (struct nlmsghdr*)skb->data; + struct nlmsghdr *nlh = nlmsg_hdr(skb); struct ecryptfs_message *msg = NLMSG_DATA(nlh); int rc; @@ -181,7 +181,7 @@ receive: "rc = [%d]\n", rc); return; } - nlh = (struct nlmsghdr *)skb->data; + nlh = nlmsg_hdr(skb); if (!NLMSG_OK(nlh, skb->len)) { ecryptfs_printk(KERN_ERR, "Received corrupt netlink " "message\n"); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 68a632b372e..36629fff26d 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -138,6 +138,11 @@ struct nlattr #include #include +static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) +{ + return (struct nlmsghdr *)skb->data; +} + struct netlink_skb_parms { struct ucred creds; /* Skb credentials */ diff --git a/kernel/audit.c b/kernel/audit.c index ea8521417d1..80a7457dadb 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -151,7 +151,7 @@ struct audit_buffer { static void audit_set_pid(struct audit_buffer *ab, pid_t pid) { - struct nlmsghdr *nlh = (struct nlmsghdr *)ab->skb->data; + struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); nlh->nlmsg_pid = pid; } @@ -750,7 +750,7 @@ static void audit_receive_skb(struct sk_buff *skb) u32 rlen; while (skb->len >= NLMSG_SPACE(0)) { - nlh = (struct nlmsghdr *)skb->data; + nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) return; rlen = NLMSG_ALIGN(nlh->nlmsg_len); @@ -1268,7 +1268,7 @@ void audit_log_end(struct audit_buffer *ab) audit_log_lost("rate limit exceeded"); } else { if (audit_pid) { - struct nlmsghdr *nlh = (struct nlmsghdr *)ab->skb->data; + struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); skb_queue_tail(&audit_skb_queue, ab->skb); ab->skb = NULL; diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 4c3476fa058..ad7d2392cb0 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -102,7 +102,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, */ static int send_reply(struct sk_buff *skb, pid_t pid) { - struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); + struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); void *reply = genlmsg_data(genlhdr); int rc; @@ -121,7 +121,7 @@ static int send_reply(struct sk_buff *skb, pid_t pid) static void send_cpu_listeners(struct sk_buff *skb, struct listener_list *listeners) { - struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); + struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); struct listener *s, *tmp; struct sk_buff *skb_next, *skb_cur = skb; void *reply = genlmsg_data(genlhdr); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index ceefd9dd0c9..9e8256a2361 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -102,7 +102,7 @@ static unsigned int dnrmg_hook(unsigned int hook, static inline void dnrmg_receive_user_skb(struct sk_buff *skb) { - struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; + struct nlmsghdr *nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) return; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cac06c43f00..3ff753c6f19 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -807,7 +807,7 @@ static void nl_fib_input(struct sock *sk, int len) if (skb == NULL) return; - nlh = (struct nlmsghdr *)skb->data; + nlh = nlmsg_hdr(skb); if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) { kfree_skb(skb); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 37362cd1d07..238999e6e87 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -847,7 +847,7 @@ static inline void inet_diag_rcv_skb(struct sk_buff *skb) { if (skb->len >= NLMSG_SPACE(0)) { int err; - struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; + struct nlmsghdr *nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 15e0d200223..17f7c988460 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -497,7 +497,7 @@ ipq_rcv_skb(struct sk_buff *skb) if (skblen < sizeof(*nlh)) return; - nlh = (struct nlmsghdr *)skb->data; + nlh = nlmsg_hdr(skb); nlmsglen = nlh->nlmsg_len; if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) return; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 5cfce218c5e..275e625e497 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -487,7 +487,7 @@ ipq_rcv_skb(struct sk_buff *skb) if (skblen < sizeof(*nlh)) return; - nlh = (struct nlmsghdr *)skb->data; + nlh = nlmsg_hdr(skb); nlmsglen = nlh->nlmsg_len; if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) return; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 50dc5edb775..04b72d3c1de 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1471,7 +1471,7 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, int err; while (skb->len >= nlmsg_total_size(0)) { - nlh = (struct nlmsghdr *) skb->data; + nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) return 0; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index b8e1edc2bad..4cdafa2d1d4 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -57,7 +57,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) if (rep_buf) { skb_push(rep_buf, hdr_space); - rep_nlh = (struct nlmsghdr *)rep_buf->data; + rep_nlh = nlmsg_hdr(rep_buf); memcpy(rep_nlh, req_nlh, hdr_space); rep_nlh->nlmsg_len = rep_buf->len; genlmsg_unicast(rep_buf, req_nlh->nlmsg_pid); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index addb5850105..5f02b4be191 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3786,7 +3786,7 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) err = -EINVAL; goto out; } - nlh = (struct nlmsghdr *)skb->data; + nlh = nlmsg_hdr(skb); err = selinux_nlmsg_lookup(isec->sclass, nlh->nlmsg_type, &perm); if (err) { -- cgit v1.2.3-70-g09d2 From 85795d64eddd4546375f5ee37bedd88cb5bc4ece Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 24 Mar 2007 21:35:33 -0700 Subject: [TCP] tcp_probe: improvements for net-2.6.22 Change tcp_probe to use ktime (needed to add one export). Add option to only get events when cwnd changes - from Doug Leith Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- kernel/time.c | 1 + net/ipv4/tcp_probe.c | 68 ++++++++++++++++++++++++++++++---------------------- 2 files changed, 40 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/kernel/time.c b/kernel/time.c index a1439f421d0..ba18ec4899b 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -452,6 +452,7 @@ struct timespec ns_to_timespec(const s64 nsec) return ts; } +EXPORT_SYMBOL(ns_to_timespec); /** * ns_to_timeval - Convert nanoseconds to timeval diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 61f406f2729..3938d5dbdf2 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -34,43 +36,45 @@ MODULE_AUTHOR("Stephen Hemminger "); MODULE_DESCRIPTION("TCP cwnd snooper"); MODULE_LICENSE("GPL"); -static int port = 0; +static int port __read_mostly = 0; MODULE_PARM_DESC(port, "Port to match (0=all)"); module_param(port, int, 0); -static int bufsize = 64*1024; +static int bufsize __read_mostly = 64*1024; MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); module_param(bufsize, int, 0); +static int full __read_mostly; +MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); +module_param(full, int, 0); + static const char procname[] = "tcpprobe"; struct { - struct kfifo *fifo; - spinlock_t lock; + struct kfifo *fifo; + spinlock_t lock; wait_queue_head_t wait; - struct timeval tstart; + ktime_t start; + u32 lastcwnd; } tcpw; +/* + * Print to log with timestamps. + * FIXME: causes an extra copy + */ static void printl(const char *fmt, ...) { va_list args; int len; - struct timeval now; + struct timespec tv; char tbuf[256]; va_start(args, fmt); - do_gettimeofday(&now); + /* want monotonic time since start of tcp_probe */ + tv = ktime_to_timespec(ktime_sub(ktime_get(), tcpw.start)); - now.tv_sec -= tcpw.tstart.tv_sec; - now.tv_usec -= tcpw.tstart.tv_usec; - if (now.tv_usec < 0) { - --now.tv_sec; - now.tv_usec += 1000000; - } - - len = sprintf(tbuf, "%lu.%06lu ", - (unsigned long) now.tv_sec, - (unsigned long) now.tv_usec); + len = sprintf(tbuf, "%lu.%09lu ", + (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec); len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); va_end(args); @@ -78,38 +82,44 @@ static void printl(const char *fmt, ...) wake_up(&tcpw.wait); } -static int jtcp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +/* + * Hook inserted to be called before each receive packet. + * Note: arguments must match tcp_rcv_established()! + */ +static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, + struct tcphdr *th, unsigned len) { const struct tcp_sock *tp = tcp_sk(sk); const struct inet_sock *inet = inet_sk(sk); - if (port == 0 || ntohs(inet->dport) == port || - ntohs(inet->sport) == port) { + /* Only update if port matches */ + if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) + && (full || tp->snd_cwnd != tcpw.lastcwnd)) { printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u\n", NIPQUAD(inet->saddr), ntohs(inet->sport), NIPQUAD(inet->daddr), ntohs(inet->dport), - size, tp->snd_nxt, tp->snd_una, + skb->len, tp->snd_nxt, tp->snd_una, tp->snd_cwnd, tcp_current_ssthresh(sk), - tp->snd_wnd); + tp->snd_wnd, tp->srtt >> 3); + tcpw.lastcwnd = tp->snd_cwnd; } jprobe_return(); return 0; } -static struct jprobe tcp_send_probe = { +static struct jprobe tcp_probe = { .kp = { - .symbol_name = "tcp_sendmsg", + .symbol_name = "tcp_rcv_established", }, - .entry = JPROBE_ENTRY(jtcp_sendmsg), + .entry = JPROBE_ENTRY(jtcp_rcv_established), }; static int tcpprobe_open(struct inode * inode, struct file * file) { kfifo_reset(tcpw.fifo); - do_gettimeofday(&tcpw.tstart); + tcpw.start = ktime_get(); return 0; } @@ -162,7 +172,7 @@ static __init int tcpprobe_init(void) if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) goto err0; - ret = register_jprobe(&tcp_send_probe); + ret = register_jprobe(&tcp_probe); if (ret) goto err1; @@ -180,7 +190,7 @@ static __exit void tcpprobe_exit(void) { kfifo_free(tcpw.fifo); proc_net_remove(procname); - unregister_jprobe(&tcp_send_probe); + unregister_jprobe(&tcp_probe); } module_exit(tcpprobe_exit); -- cgit v1.2.3-70-g09d2 From af65bdfce98d7965fbe93a48b8128444a2eea024 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 20 Apr 2007 14:14:21 -0700 Subject: [NETLINK]: Switch cb_lock spinlock to mutex and allow to override it Switch cb_lock to mutex and allow netlink kernel users to override it with a subsystem specific mutex for consistent locking in dump callbacks. All netlink_dump_start users have been audited not to rely on any side-effects of the previously used spinlock. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/connector/connector.c | 2 +- drivers/scsi/scsi_netlink.c | 3 ++- drivers/scsi/scsi_transport_iscsi.c | 2 +- fs/ecryptfs/netlink.c | 2 +- include/linux/netlink.h | 5 ++++- kernel/audit.c | 2 +- lib/kobject_uevent.c | 2 +- net/bridge/netfilter/ebt_ulog.c | 2 +- net/core/rtnetlink.c | 2 +- net/decnet/netfilter/dn_rtmsg.c | 2 +- net/ipv4/fib_frontend.c | 3 ++- net/ipv4/inet_diag.c | 2 +- net/ipv4/netfilter/ip_queue.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 2 +- net/ipv6/netfilter/ip6_queue.c | 2 +- net/netfilter/nfnetlink.c | 2 +- net/netlink/af_netlink.c | 38 ++++++++++++++++++++++--------------- net/netlink/genetlink.c | 2 +- net/xfrm/xfrm_user.c | 2 +- security/selinux/netlink.c | 2 +- 20 files changed, 47 insertions(+), 34 deletions(-) (limited to 'kernel') diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 7f9c4fb7e5b..a7b9e9bb3e8 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -448,7 +448,7 @@ static int __devinit cn_init(void) dev->nls = netlink_kernel_create(NETLINK_CONNECTOR, CN_NETLINK_USERS + 0xf, - dev->input, THIS_MODULE); + dev->input, NULL, THIS_MODULE); if (!dev->nls) return -EIO; diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c index 45646a28524..4bf9aa547c7 100644 --- a/drivers/scsi/scsi_netlink.c +++ b/drivers/scsi/scsi_netlink.c @@ -168,7 +168,8 @@ scsi_netlink_init(void) } scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT, - SCSI_NL_GRP_CNT, scsi_nl_rcv, THIS_MODULE); + SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL, + THIS_MODULE); if (!scsi_nl_sock) { printk(KERN_ERR "%s: register of recieve handler failed\n", __FUNCTION__); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 10590cd7e9e..aabaa0576ab 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1435,7 +1435,7 @@ static __init int iscsi_transport_init(void) if (err) goto unregister_conn_class; - nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, + nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, NULL, THIS_MODULE); if (!nls) { err = -ENOBUFS; diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c index 8405d216a5f..fe9186312d7 100644 --- a/fs/ecryptfs/netlink.c +++ b/fs/ecryptfs/netlink.c @@ -229,7 +229,7 @@ int ecryptfs_init_netlink(void) ecryptfs_nl_sock = netlink_kernel_create(NETLINK_ECRYPTFS, 0, ecryptfs_receive_nl_message, - THIS_MODULE); + NULL, THIS_MODULE); if (!ecryptfs_nl_sock) { rc = -EIO; ecryptfs_printk(KERN_ERR, "Failed to create netlink socket\n"); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 0d11f6a7389..f41688f5663 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -157,7 +157,10 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module); +extern struct sock *netlink_kernel_create(int unit, unsigned int groups, + void (*input)(struct sock *sk, int len), + struct mutex *cb_mutex, + struct module *module); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); diff --git a/kernel/audit.c b/kernel/audit.c index 80a7457dadb..4e9d2082968 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -795,7 +795,7 @@ static int __init audit_init(void) printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, - THIS_MODULE); + NULL, THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); else diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 84272ed77f0..82fc1794b69 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -293,7 +293,7 @@ EXPORT_SYMBOL_GPL(add_uevent_var); static int __init kobject_uevent_init(void) { uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, - THIS_MODULE); + NULL, THIS_MODULE); if (!uevent_sock) { printk(KERN_ERR diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 8b84cd40279..9411db62591 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -302,7 +302,7 @@ static int __init ebt_ulog_init(void) } ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, - NULL, THIS_MODULE); + NULL, NULL, THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5266df33705..648a7b6d15d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -972,7 +972,7 @@ void __init rtnetlink_init(void) panic("rtnetlink_init: cannot allocate rta_buf\n"); rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, - THIS_MODULE); + NULL, THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 2ee47bab693..696234688cf 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -138,7 +138,7 @@ static int __init dn_rtmsg_init(void) int rv = 0; dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, - dnrmg_receive_user_sk, THIS_MODULE); + dnrmg_receive_user_sk, NULL, THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 5bf718a3e49..953dd458c23 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -827,7 +827,8 @@ static void nl_fib_input(struct sock *sk, int len) static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE); + netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL, + THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 0148f0e34ce..dbeacd8b0f9 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -893,7 +893,7 @@ static int __init inet_diag_init(void) goto out; idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, - THIS_MODULE); + NULL, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; err = 0; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 0d72693869e..702d94db19b 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -668,7 +668,7 @@ static int __init ip_queue_init(void) netlink_register_notifier(&ipq_nl_notifier); ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, - THIS_MODULE); + NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index a2bcba70af5..23b607b33b3 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -420,7 +420,7 @@ static int __init ipt_ulog_init(void) setup_timer(&ulog_buffers[i].timer, ulog_timer, i); nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, - THIS_MODULE); + NULL, THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index bfae9fdc466..0004db38af6 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -657,7 +657,7 @@ static int __init ip6_queue_init(void) struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, + ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index b0da853eabe..8797e6953ef 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -265,7 +265,7 @@ static int __init nfnetlink_init(void) printk("Netfilter messages via NETLINK v%s.\n", nfversion); nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, - nfnetlink_rcv, THIS_MODULE); + nfnetlink_rcv, NULL, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); return -1; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2cbf1682f63..ec16c9b7b3b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -76,7 +77,8 @@ struct netlink_sock { unsigned long state; wait_queue_head_t wait; struct netlink_callback *cb; - spinlock_t cb_lock; + struct mutex *cb_mutex; + struct mutex cb_def_mutex; void (*data_ready)(struct sock *sk, int bytes); struct module *module; }; @@ -108,6 +110,7 @@ struct netlink_table { unsigned long *listeners; unsigned int nl_nonroot; unsigned int groups; + struct mutex *cb_mutex; struct module *module; int registered; }; @@ -370,7 +373,8 @@ static struct proto netlink_proto = { .obj_size = sizeof(struct netlink_sock), }; -static int __netlink_create(struct socket *sock, int protocol) +static int __netlink_create(struct socket *sock, struct mutex *cb_mutex, + int protocol) { struct sock *sk; struct netlink_sock *nlk; @@ -384,7 +388,8 @@ static int __netlink_create(struct socket *sock, int protocol) sock_init_data(sock, sk); nlk = nlk_sk(sk); - spin_lock_init(&nlk->cb_lock); + nlk->cb_mutex = cb_mutex ? : &nlk->cb_def_mutex; + mutex_init(nlk->cb_mutex); init_waitqueue_head(&nlk->wait); sk->sk_destruct = netlink_sock_destruct; @@ -395,6 +400,7 @@ static int __netlink_create(struct socket *sock, int protocol) static int netlink_create(struct socket *sock, int protocol) { struct module *module = NULL; + struct mutex *cb_mutex; struct netlink_sock *nlk; int err = 0; @@ -417,9 +423,10 @@ static int netlink_create(struct socket *sock, int protocol) if (nl_table[protocol].registered && try_module_get(nl_table[protocol].module)) module = nl_table[protocol].module; + cb_mutex = nl_table[protocol].cb_mutex; netlink_unlock_table(); - if ((err = __netlink_create(sock, protocol)) < 0) + if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0) goto out_module; nlk = nlk_sk(sock->sk); @@ -444,14 +451,14 @@ static int netlink_release(struct socket *sock) sock_orphan(sk); nlk = nlk_sk(sk); - spin_lock(&nlk->cb_lock); + mutex_lock(nlk->cb_mutex); if (nlk->cb) { if (nlk->cb->done) nlk->cb->done(nlk->cb); netlink_destroy_callback(nlk->cb); nlk->cb = NULL; } - spin_unlock(&nlk->cb_lock); + mutex_unlock(nlk->cb_mutex); /* OK. Socket is unlinked, and, therefore, no new packets will arrive */ @@ -1266,7 +1273,7 @@ static void netlink_data_ready(struct sock *sk, int len) struct sock * netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), - struct module *module) + struct mutex *cb_mutex, struct module *module) { struct socket *sock; struct sock *sk; @@ -1281,7 +1288,7 @@ netlink_kernel_create(int unit, unsigned int groups, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (__netlink_create(sock, unit) < 0) + if (__netlink_create(sock, cb_mutex, unit) < 0) goto out_sock_release; if (groups < 32) @@ -1305,6 +1312,7 @@ netlink_kernel_create(int unit, unsigned int groups, netlink_table_grab(); nl_table[unit].groups = groups; nl_table[unit].listeners = listeners; + nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; nl_table[unit].registered = 1; netlink_table_ungrab(); @@ -1347,7 +1355,7 @@ static int netlink_dump(struct sock *sk) if (!skb) goto errout; - spin_lock(&nlk->cb_lock); + mutex_lock(nlk->cb_mutex); cb = nlk->cb; if (cb == NULL) { @@ -1358,7 +1366,7 @@ static int netlink_dump(struct sock *sk) len = cb->dump(skb, cb); if (len > 0) { - spin_unlock(&nlk->cb_lock); + mutex_unlock(nlk->cb_mutex); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, len); return 0; @@ -1376,13 +1384,13 @@ static int netlink_dump(struct sock *sk) if (cb->done) cb->done(cb); nlk->cb = NULL; - spin_unlock(&nlk->cb_lock); + mutex_unlock(nlk->cb_mutex); netlink_destroy_callback(cb); return 0; errout_skb: - spin_unlock(&nlk->cb_lock); + mutex_unlock(nlk->cb_mutex); kfree_skb(skb); errout: return err; @@ -1414,15 +1422,15 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, } nlk = nlk_sk(sk); /* A dump or destruction is in progress... */ - spin_lock(&nlk->cb_lock); + mutex_lock(nlk->cb_mutex); if (nlk->cb || sock_flag(sk, SOCK_DEAD)) { - spin_unlock(&nlk->cb_lock); + mutex_unlock(nlk->cb_mutex); netlink_destroy_callback(cb); sock_put(sk); return -EBUSY; } nlk->cb = cb; - spin_unlock(&nlk->cb_lock); + mutex_unlock(nlk->cb_mutex); netlink_dump(sk); sock_put(sk); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index fac2e7a6dbe..6e31234a419 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -558,7 +558,7 @@ static int __init genl_init(void) netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID, - genl_rcv, THIS_MODULE); + genl_rcv, NULL, THIS_MODULE); if (genl_sock == NULL) panic("GENL: Cannot initialize generic netlink\n"); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2ff968373f1..88659edc9b1 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2444,7 +2444,7 @@ static int __init xfrm_user_init(void) printk(KERN_INFO "Initializing XFRM netlink socket\n"); nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, - xfrm_netlink_rcv, THIS_MODULE); + xfrm_netlink_rcv, NULL, THIS_MODULE); if (nlsk == NULL) return -ENOMEM; rcu_assign_pointer(xfrm_nl, nlsk); diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 33f2e064a68..f49046de63a 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -104,7 +104,7 @@ void selnl_notify_policyload(u32 seqno) static int __init selnl_init(void) { - selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, + selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, NULL, THIS_MODULE); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); -- cgit v1.2.3-70-g09d2 From e19dff1fdd99a25819af74cf0710e147fff4fd3a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 26 Apr 2007 15:46:56 -0700 Subject: [AF_RXRPC]: Make it possible to merely try to cancel timers from a module Export try_to_del_timer_sync() for use by the AF_RXRPC module. Signed-off-by: David Howells Signed-off-by: David S. Miller --- kernel/timer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index dd6c2c1c561..b22bd39740d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -505,6 +505,8 @@ out: return ret; } +EXPORT_SYMBOL(try_to_del_timer_sync); + /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated -- cgit v1.2.3-70-g09d2 From 39bc89fd4019b164002adaacef92c4140e37955a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Apr 2007 20:50:03 -0700 Subject: make SysRq-T show all tasks again show_state() (SysRq-T) developed the buggy habbit of not showing TASK_RUNNING tasks. This was due to the mistaken belief that state_filter == -1 would be a pass-through filter - while in reality it did not let TASK_RUNNING == 0 p->state values through. Fix this by restoring the original '!state_filter means all tasks' special-case i had in the original version. Test-built and test-booted on i686, SysRq-T now works as intended. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++-- kernel/sched.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index 49fe2997a01..a1707583de4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -196,13 +196,13 @@ extern void init_idle(struct task_struct *idle, int cpu); extern cpumask_t nohz_cpu_mask; /* - * Only dump TASK_* tasks. (-1 for all tasks) + * Only dump TASK_* tasks. (0 for all tasks) */ extern void show_state_filter(unsigned long state_filter); static inline void show_state(void) { - show_state_filter(-1); + show_state_filter(0); } extern void show_regs(struct pt_regs *); diff --git a/kernel/sched.c b/kernel/sched.c index b9a68373014..960d7c5fca3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4746,7 +4746,7 @@ void show_state_filter(unsigned long state_filter) * console might take alot of time: */ touch_nmi_watchdog(); - if (p->state & state_filter) + if (!state_filter || (p->state & state_filter)) show_task(p); } while_each_thread(g, p); -- cgit v1.2.3-70-g09d2 From a53c46dc8253cc613ad66a2ca7aad6de8b7e61b9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Apr 2007 11:43:58 +0200 Subject: s2ram: add arch irq disable/enable hooks After some more discussion this patch replaces it: From: Johannes Berg Subject: suspend: add arch irq disable/enable hooks For powermac, we need to do some things between suspending devices and device_power_off, for example setting the decrementer. This patch allows architectures to define arch_s2ram_{en,dis}able_irqs in their asm/suspend.h to have control over this step. Signed-off-by: Johannes Berg Acked-by: Pavel Machek Cc: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/pm.h | 18 ++++++++++++++++++ kernel/power/main.c | 18 +++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/pm.h b/include/linux/pm.h index b0ab623adbf..9bd86db4d39 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -166,6 +166,24 @@ extern struct pm_ops *pm_ops; extern int pm_suspend(suspend_state_t state); +/** + * arch_suspend_disable_irqs - disable IRQs for suspend + * + * Disables IRQs (in the default case). This is a weak symbol in the common + * code and thus allows architectures to override it if more needs to be + * done. Not called for suspend to disk. + */ +extern void arch_suspend_disable_irqs(void); + +/** + * arch_suspend_enable_irqs - enable IRQs after suspend + * + * Enables IRQs (in the default case). This is a weak symbol in the common + * code and thus allows architectures to override it if more needs to be + * done. Not called for suspend to disk. + */ +extern void arch_suspend_enable_irqs(void); + /* * Device power management */ diff --git a/kernel/power/main.c b/kernel/power/main.c index a064dfd8877..3062e940d1f 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -111,13 +111,24 @@ static int suspend_prepare(suspend_state_t state) return error; } +/* default implementation */ +void __attribute__ ((weak)) arch_suspend_disable_irqs(void) +{ + local_irq_disable(); +} + +/* default implementation */ +void __attribute__ ((weak)) arch_suspend_enable_irqs(void) +{ + local_irq_enable(); +} int suspend_enter(suspend_state_t state) { int error = 0; - unsigned long flags; - local_irq_save(flags); + arch_suspend_disable_irqs(); + BUG_ON(!irqs_disabled()); if ((error = device_power_down(PMSG_SUSPEND))) { printk(KERN_ERR "Some devices failed to power down\n"); @@ -126,7 +137,8 @@ int suspend_enter(suspend_state_t state) error = pm_ops->enter(state); device_power_up(); Done: - local_irq_restore(flags); + arch_suspend_enable_irqs(); + BUG_ON(irqs_disabled()); return error; } -- cgit v1.2.3-70-g09d2 From 240936e18b75937e7866934df723c2db0011d24f Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 26 Apr 2007 00:12:09 -0700 Subject: mod_sysfs_setup() doesn't return errno when kobject_add_dir() failure occurs mod_sysfs_setup() doesn't return an errno when kobject_add_dir() for module "holders" directory fails. So caller of mod_sysfs_setup() will keep going and get oops. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index dcdb32b8b13..9da5af668a2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1148,8 +1148,10 @@ int mod_sysfs_setup(struct module *mod, goto out; mod->holders_dir = kobject_add_dir(&mod->mkobj.kobj, "holders"); - if (!mod->holders_dir) + if (!mod->holders_dir) { + err = -ENOMEM; goto out_unreg; + } err = module_param_sysfs_setup(mod, kparam, num_params); if (err) -- cgit v1.2.3-70-g09d2 From b8b8fd2dc23725fba77f66b3fef11b11f983fc08 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 27 Apr 2007 15:31:24 -0700 Subject: [NET]: Fix networking compilation errors Fix miscellaneous networking compilation errors. (*) Export ktime_add_ns() for modules. (*) wext_proc_init() should have an ANSI declaration. Signed-off-by: David Howells Signed-off-by: David S. Miller --- include/net/wext.h | 2 +- kernel/hrtimer.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/net/wext.h b/include/net/wext.h index 55741836a67..c02b8decf3a 100644 --- a/include/net/wext.h +++ b/include/net/wext.h @@ -10,7 +10,7 @@ extern int wext_proc_init(void); extern int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, void __user *arg); #else -static inline int wext_proc_init() +static inline int wext_proc_init(void) { return 0; } diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f5cfde8c902..1b3033105b4 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -279,6 +279,8 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec) return ktime_add(kt, tmp); } + +EXPORT_SYMBOL_GPL(ktime_add_ns); # endif /* !CONFIG_KTIME_SCALAR */ /* -- cgit v1.2.3-70-g09d2 From 8cdfb29c0cd8018f92214c11c631d8926f4cb032 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Fri, 9 Mar 2007 10:54:42 -0500 Subject: libata/IDE: remove combined mode quirk Both old-IDE and libata should be able handle all controllers and devices found using normal resource reservation methods. This eliminates the awful, low-performing split-driver configuration where old-IDE drove the PATA portion of a PCI device, in PIO-only mode, and libata drove the SATA portion of the /same/ PCI device, in DMA mode. Typically vendors would ship SATA hard drive / PATA optical configuration, which would lend itself to slow (PIO-only) CD-ROM performance. For Intel users running in combined mode, it is now wholly dependent on your driver choice (potentially link order, if you compile both drivers in) whether old-IDE or libata will drive your hardware. In either case, you will get full performance from both SATA and PATA ports now, without having to pass a kernel command line parameter. Signed-off-by: Jeff Garzik --- arch/i386/defconfig | 1 - arch/parisc/configs/c3000_defconfig | 1 - arch/x86_64/defconfig | 1 - drivers/ata/Kconfig | 5 -- drivers/ata/libata-sff.c | 36 ++---------- drivers/pci/quirks.c | 113 ------------------------------------ include/linux/ioport.h | 1 - kernel/resource.c | 21 ------- 8 files changed, 6 insertions(+), 173 deletions(-) (limited to 'kernel') diff --git a/arch/i386/defconfig b/arch/i386/defconfig index f4efd66e1ee..c96911c37ae 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -692,7 +692,6 @@ CONFIG_SATA_SIL=y CONFIG_SATA_VIA=y # CONFIG_SATA_VITESSE is not set # CONFIG_SATA_INIC162X is not set -CONFIG_SATA_INTEL_COMBINED=y CONFIG_SATA_ACPI=y # CONFIG_PATA_ALI is not set # CONFIG_PATA_AMD is not set diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig index 782906b644d..eb2f9a3d515 100644 --- a/arch/parisc/configs/c3000_defconfig +++ b/arch/parisc/configs/c3000_defconfig @@ -435,7 +435,6 @@ CONFIG_SCSI_SATA_SIL=m # CONFIG_SCSI_SATA_ULI is not set CONFIG_SCSI_SATA_VIA=m # CONFIG_SCSI_SATA_VITESSE is not set -CONFIG_SCSI_SATA_INTEL_COMBINED=y # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_IPS is not set diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 7a1e251e333..b26378815b9 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -631,7 +631,6 @@ CONFIG_SATA_SIL=y CONFIG_SATA_VIA=y # CONFIG_SATA_VITESSE is not set # CONFIG_SATA_INIC162X is not set -CONFIG_SATA_INTEL_COMBINED=y CONFIG_SATA_ACPI=y # CONFIG_PATA_ALI is not set # CONFIG_PATA_AMD is not set diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index c679bba4789..365c306c7cf 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -156,11 +156,6 @@ config SATA_INIC162X help This option enables support for Initio 162x Serial ATA. -config SATA_INTEL_COMBINED - bool - depends on IDE=y && !BLK_DEV_IDE_SATA && (SATA_AHCI || ATA_PIIX) - default y - config SATA_ACPI bool depends on ACPI && PCI diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 066689c5e50..0a194660fd2 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -779,40 +779,16 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, /* Deal with combined mode hack. This side of the logic all goes away once the combined mode hack is killed in 2.6.21 */ if (!devm_request_region(dev, ATA_PRIMARY_CMD, 8, "libata")) { - struct resource *conflict, res; - res.start = ATA_PRIMARY_CMD; - res.end = ATA_PRIMARY_CMD + 8 - 1; - conflict = ____request_resource(&ioport_resource, &res); - while (conflict->child) - conflict = ____request_resource(conflict, &res); - if (!strcmp(conflict->name, "libata")) - legacy_mode |= ATA_PORT_PRIMARY; - else { - pcim_pin_device(pdev); - printk(KERN_WARNING "ata: 0x%0X IDE port busy\n" \ - "ata: conflict with %s\n", - ATA_PRIMARY_CMD, - conflict->name); - } + pcim_pin_device(pdev); + printk(KERN_WARNING "ata: 0x%0X IDE port busy\n", + ATA_PRIMARY_CMD); } else legacy_mode |= ATA_PORT_PRIMARY; if (!devm_request_region(dev, ATA_SECONDARY_CMD, 8, "libata")) { - struct resource *conflict, res; - res.start = ATA_SECONDARY_CMD; - res.end = ATA_SECONDARY_CMD + 8 - 1; - conflict = ____request_resource(&ioport_resource, &res); - while (conflict->child) - conflict = ____request_resource(conflict, &res); - if (!strcmp(conflict->name, "libata")) - legacy_mode |= ATA_PORT_SECONDARY; - else { - pcim_pin_device(pdev); - printk(KERN_WARNING "ata: 0x%X IDE port busy\n" \ - "ata: conflict with %s\n", - ATA_SECONDARY_CMD, - conflict->name); - } + pcim_pin_device(pdev); + printk(KERN_WARNING "ata: 0x%X IDE port busy\n", + ATA_SECONDARY_CMD); } else legacy_mode |= ATA_PORT_SECONDARY; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 65d6f23ead4..3411483240c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1303,119 +1303,6 @@ static void __init quirk_alder_ioapic(struct pci_dev *pdev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EESSC, quirk_alder_ioapic ); #endif -enum ide_combined_type { COMBINED = 0, IDE = 1, LIBATA = 2 }; -/* Defaults to combined */ -static enum ide_combined_type combined_mode; - -static int __init combined_setup(char *str) -{ - if (!strncmp(str, "ide", 3)) - combined_mode = IDE; - else if (!strncmp(str, "libata", 6)) - combined_mode = LIBATA; - else /* "combined" or anything else defaults to old behavior */ - combined_mode = COMBINED; - - return 1; -} -__setup("combined_mode=", combined_setup); - -#ifdef CONFIG_SATA_INTEL_COMBINED -static void __devinit quirk_intel_ide_combined(struct pci_dev *pdev) -{ - u8 prog, comb, tmp; - int ich = 0; - - /* - * Narrow down to Intel SATA PCI devices. - */ - switch (pdev->device) { - /* PCI ids taken from drivers/scsi/ata_piix.c */ - case 0x24d1: - case 0x24df: - case 0x25a3: - case 0x25b0: - ich = 5; - break; - case 0x2651: - case 0x2652: - case 0x2653: - case 0x2680: /* ESB2 */ - ich = 6; - break; - case 0x27c0: - case 0x27c4: - ich = 7; - break; - case 0x2828: /* ICH8M */ - ich = 8; - break; - default: - /* we do not handle this PCI device */ - return; - } - - /* - * Read combined mode register. - */ - pci_read_config_byte(pdev, 0x90, &tmp); /* combined mode reg */ - - if (ich == 5) { - tmp &= 0x6; /* interesting bits 2:1, PATA primary/secondary */ - if (tmp == 0x4) /* bits 10x */ - comb = (1 << 0); /* SATA port 0, PATA port 1 */ - else if (tmp == 0x6) /* bits 11x */ - comb = (1 << 2); /* PATA port 0, SATA port 1 */ - else - return; /* not in combined mode */ - } else { - WARN_ON((ich != 6) && (ich != 7) && (ich != 8)); - tmp &= 0x3; /* interesting bits 1:0 */ - if (tmp & (1 << 0)) - comb = (1 << 2); /* PATA port 0, SATA port 1 */ - else if (tmp & (1 << 1)) - comb = (1 << 0); /* SATA port 0, PATA port 1 */ - else - return; /* not in combined mode */ - } - - /* - * Read programming interface register. - * (Tells us if it's legacy or native mode) - */ - pci_read_config_byte(pdev, PCI_CLASS_PROG, &prog); - - /* if SATA port is in native mode, we're ok. */ - if (prog & comb) - return; - - /* Don't reserve any so the IDE driver can get them (but only if - * combined_mode=ide). - */ - if (combined_mode == IDE) - return; - - /* Grab them both for libata if combined_mode=libata. */ - if (combined_mode == LIBATA) { - request_region(0x1f0, 8, "libata"); /* port 0 */ - request_region(0x170, 8, "libata"); /* port 1 */ - return; - } - - /* SATA port is in legacy mode. Reserve port so that - * IDE driver does not attempt to use it. If request_region - * fails, it will be obvious at boot time, so we don't bother - * checking return values. - */ - if (comb == (1 << 0)) - request_region(0x1f0, 8, "libata"); /* port 0 */ - else - request_region(0x170, 8, "libata"); /* port 1 */ -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_intel_ide_combined ); -#endif /* CONFIG_SATA_INTEL_COMBINED */ - - int pcie_mch_quirk; EXPORT_SYMBOL(pcie_mch_quirk); diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6859a3b1408..71ea9231924 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -99,7 +99,6 @@ extern struct resource ioport_resource; extern struct resource iomem_resource; extern int request_resource(struct resource *root, struct resource *new); -extern struct resource * ____request_resource(struct resource *root, struct resource *new); extern int release_resource(struct resource *new); extern int insert_resource(struct resource *parent, struct resource *new); extern int allocate_resource(struct resource *root, struct resource *new, diff --git a/kernel/resource.c b/kernel/resource.c index bdb55a33f96..9bd14fd3e6d 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -212,27 +212,6 @@ int request_resource(struct resource *root, struct resource *new) EXPORT_SYMBOL(request_resource); -/** - * ____request_resource - reserve a resource, with resource conflict returned - * @root: root resource descriptor - * @new: resource descriptor desired by caller - * - * Returns: - * On success, NULL is returned. - * On error, a pointer to the conflicting resource is returned. - */ -struct resource *____request_resource(struct resource *root, struct resource *new) -{ - struct resource *conflict; - - write_lock(&resource_lock); - conflict = __request_resource(root, new); - write_unlock(&resource_lock); - return conflict; -} - -EXPORT_SYMBOL(____request_resource); - /** * release_resource - release a previously reserved resource * @old: resource pointer -- cgit v1.2.3-70-g09d2 From 42e380832a6911c8a3173ee0172fbc0e4864d80b Mon Sep 17 00:00:00 2001 From: Robert Peterson Date: Mon, 30 Apr 2007 15:09:48 -0700 Subject: Extend print_symbol capability Today's print_symbol function dumps a kernel symbol with printk. This patch extends the functionality of kallsyms.c so that the symbol lookup function may be used without the printk. This is useful for modules that want to dump symbols elsewhere, for example, to debugfs. I intend to use the new function call in the GFS2 file system (which will be a separate patch). [akpm@linux-foundation.org: build fix] [clameter@sgi.com: sprint_symbol should return length of string like sprintf] Signed-off-by: Robert Peterson Cc: Rusty Russell Cc: Roman Zippel Cc: "Randy.Dunlap" Cc: Sam Ravnborg Acked-by: Paulo Marques Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 13 ++++++++++++- kernel/kallsyms.c | 23 +++++++++++++++-------- 2 files changed, 27 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 1cebcbc28b4..3e3b92dabe3 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -7,6 +7,8 @@ #define KSYM_NAME_LEN 127 +#define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN + \ + 2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1) #ifdef CONFIG_KALLSYMS /* Lookup the address for a symbol. Returns 0 if not found. */ @@ -22,7 +24,10 @@ const char *kallsyms_lookup(unsigned long addr, unsigned long *offset, char **modname, char *namebuf); -/* Replace "%s" in format with address, if found */ +/* Look up a kernel symbol and return it in a text buffer. */ +extern int sprint_symbol(char *buffer, unsigned long address); + +/* Look up a kernel symbol and print it to the kernel messages. */ extern void __print_symbol(const char *fmt, unsigned long address); #else /* !CONFIG_KALLSYMS */ @@ -47,6 +52,12 @@ static inline const char *kallsyms_lookup(unsigned long addr, return NULL; } +static inline int sprint_symbol(char *buffer, unsigned long addr) +{ + *buffer = '\0'; + return 0; +} + /* Stupid that this does nothing, but I didn't create this mess. */ #define __print_symbol(fmt, addr) #endif /*CONFIG_KALLSYMS*/ diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 6f294ff4f9e..5a0de840973 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -267,27 +267,33 @@ const char *kallsyms_lookup(unsigned long addr, return NULL; } -/* Replace "%s" in format with address, or returns -errno. */ -void __print_symbol(const char *fmt, unsigned long address) +/* Look up a kernel symbol and return it in a text buffer. */ +int sprint_symbol(char *buffer, unsigned long address) { char *modname; const char *name; unsigned long offset, size; char namebuf[KSYM_NAME_LEN+1]; - char buffer[sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN + - 2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1]; name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); - if (!name) - sprintf(buffer, "0x%lx", address); + return sprintf(buffer, "0x%lx", address); else { if (modname) - sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, + return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, size, modname); else - sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); + return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); } +} + +/* Look up a kernel symbol and print it to the kernel messages. */ +void __print_symbol(const char *fmt, unsigned long address) +{ + char buffer[KSYM_SYMBOL_LEN]; + + sprint_symbol(buffer, address); + printk(fmt, buffer); } @@ -452,3 +458,4 @@ static int __init kallsyms_init(void) __initcall(kallsyms_init); EXPORT_SYMBOL(__print_symbol); +EXPORT_SYMBOL_GPL(sprint_symbol); -- cgit v1.2.3-70-g09d2 From fe0c935a6cbf25d72a27c7a345df8a2151de0b74 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 30 Apr 2007 15:09:51 -0700 Subject: rework pm_ops pm_disk_mode, kill misuse This patch series cleans up some misconceptions about pm_ops. Some users of the pm_ops structure attempt to use it to stop the user from entering suspend to disk, this, however, is not possible since the user can always use "shutdown" in /sys/power/disk and then the pm_ops are never invoked. Also, platforms that don't support suspend to disk simply should not allow configuring SOFTWARE_SUSPEND (read the help text on it, it only selects suspend to disk and nothing else, all the other stuff depends on PM). The pm_ops structure is actually intended to provide a way to enter platform-defined sleep states (currently supported states are "standby" and "mem" (suspend to ram)) and additionally (if SOFTWARE_SUSPEND is configured) allows a platform to support a platform specific way to enter low-power mode once everything has been saved to disk. This is currently only used by ACPI (S4). This patch: The pm_ops.pm_disk_mode is used in totally bogus ways since nobody really seems to understand what it actually does. This patch clarifies the pm_disk_mode description. It also removes all the arm and sh users that think they can veto suspend to disk via pm_ops; not so since the user can always do echo shutdown > /sys/power/disk, they need to find a better way involving Kconfig or such. ACPI is the only user left with a non-zero pm_disk_mode. The patch also sets the default mode to shutdown again, but when a new pm_ops is registered its pm_disk_mode is selected as default, that way the default stays for ACPI where it is apparently required. Signed-off-by: Johannes Berg Cc: David Brownell Acked-by: Pavel Machek Cc: Cc: Len Brown Acked-by: Russell King Cc: Greg KH Cc: "Rafael J. Wysocki" Acked-by: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/common/sharpsl_pm.c | 1 - arch/arm/mach-at91/pm.c | 1 - arch/arm/mach-omap1/pm.c | 1 - arch/arm/mach-omap2/pm.c | 1 - arch/arm/mach-pxa/pm.c | 4 --- arch/arm/mach-sa1100/pm.c | 7 ------ arch/arm/plat-s3c24xx/pm.c | 9 ------- arch/sh/boards/hp6xx/pm.c | 7 ------ include/linux/pm.h | 23 +++++++++-------- kernel/power/disk.c | 60 ++++++++++++++++++++++++++++---------------- kernel/power/main.c | 6 ++++- 11 files changed, 56 insertions(+), 64 deletions(-) (limited to 'kernel') diff --git a/arch/arm/common/sharpsl_pm.c b/arch/arm/common/sharpsl_pm.c index a9bc5b52218..4cb895d4ae5 100644 --- a/arch/arm/common/sharpsl_pm.c +++ b/arch/arm/common/sharpsl_pm.c @@ -766,7 +766,6 @@ static void sharpsl_apm_get_power_status(struct apm_power_info *info) } static struct pm_ops sharpsl_pm_ops = { - .pm_disk_mode = PM_DISK_FIRMWARE, .prepare = pxa_pm_prepare, .enter = corgi_pxa_pm_enter, .finish = pxa_pm_finish, diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index b49bfda53d7..ff8db29e989 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -201,7 +201,6 @@ error: static struct pm_ops at91_pm_ops ={ - .pm_disk_mode = 0, .valid = at91_pm_valid_state, .prepare = at91_pm_prepare, .enter = at91_pm_enter, diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index 49efe903dac..4248117e5e3 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -698,7 +698,6 @@ static struct irqaction omap_wakeup_irq = { static struct pm_ops omap_pm_ops ={ - .pm_disk_mode = 0, .prepare = omap_pm_prepare, .enter = omap_pm_enter, .finish = omap_pm_finish, diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index d7eee99b7e3..3e9a128feea 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -370,7 +370,6 @@ static int omap2_pm_finish(suspend_state_t state) } static struct pm_ops omap_pm_ops = { - .pm_disk_mode = 0, .prepare = omap2_pm_prepare, .enter = omap2_pm_enter, .finish = omap2_pm_finish, diff --git a/arch/arm/mach-pxa/pm.c b/arch/arm/mach-pxa/pm.c index b4d8276d605..0a99ef43b36 100644 --- a/arch/arm/mach-pxa/pm.c +++ b/arch/arm/mach-pxa/pm.c @@ -223,11 +223,7 @@ int pxa_pm_finish(suspend_state_t state) EXPORT_SYMBOL_GPL(pxa_pm_finish); -/* - * Set to PM_DISK_FIRMWARE so we can quickly veto suspend-to-disk. - */ static struct pm_ops pxa_pm_ops = { - .pm_disk_mode = PM_DISK_FIRMWARE, .prepare = pxa_pm_prepare, .enter = pxa_pm_enter, .finish = pxa_pm_finish, diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c index 786c8534231..b0837113b2a 100644 --- a/arch/arm/mach-sa1100/pm.c +++ b/arch/arm/mach-sa1100/pm.c @@ -59,9 +59,6 @@ static int sa11x0_pm_enter(suspend_state_t state) unsigned long gpio, sleep_save[SLEEP_SAVE_SIZE]; struct timespec delta, rtc; - if (state != PM_SUSPEND_MEM) - return -EINVAL; - /* preserve current time */ rtc.tv_sec = RCNR; rtc.tv_nsec = 0; @@ -134,11 +131,7 @@ unsigned long sleep_phys_sp(void *sp) return virt_to_phys(sp); } -/* - * Set to PM_DISK_FIRMWARE so we can quickly veto suspend-to-disk. - */ static struct pm_ops sa11x0_pm_ops = { - .pm_disk_mode = PM_DISK_FIRMWARE, .enter = sa11x0_pm_enter, }; diff --git a/arch/arm/plat-s3c24xx/pm.c b/arch/arm/plat-s3c24xx/pm.c index ecf68d61190..d6af3082af3 100644 --- a/arch/arm/plat-s3c24xx/pm.c +++ b/arch/arm/plat-s3c24xx/pm.c @@ -511,11 +511,6 @@ static int s3c2410_pm_enter(suspend_state_t state) return -EINVAL; } - if (state != PM_SUSPEND_MEM) { - printk(KERN_ERR PFX "error: only PM_SUSPEND_MEM supported\n"); - return -EINVAL; - } - /* check if we have anything to wake-up with... bad things seem * to happen if you suspend with no wakeup (system will often * require a full power-cycle) @@ -633,11 +628,7 @@ static int s3c2410_pm_finish(suspend_state_t state) return 0; } -/* - * Set to PM_DISK_FIRMWARE so we can quickly veto suspend-to-disk. - */ static struct pm_ops s3c2410_pm_ops = { - .pm_disk_mode = PM_DISK_FIRMWARE, .prepare = s3c2410_pm_prepare, .enter = s3c2410_pm_enter, .finish = s3c2410_pm_finish, diff --git a/arch/sh/boards/hp6xx/pm.c b/arch/sh/boards/hp6xx/pm.c index d1947732fb3..4b2f29a4bde 100644 --- a/arch/sh/boards/hp6xx/pm.c +++ b/arch/sh/boards/hp6xx/pm.c @@ -27,9 +27,6 @@ static int hp6x0_pm_enter(suspend_state_t state) u16 hd64461_stbcr; #endif - if (state != PM_SUSPEND_MEM) - return -EINVAL; - #ifdef CONFIG_HD64461_ENABLER outb(0, HD64461_PCC1CSCIER); @@ -70,11 +67,7 @@ static int hp6x0_pm_enter(suspend_state_t state) return 0; } -/* - * Set to PM_DISK_FIRMWARE so we can quickly veto suspend-to-disk. - */ static struct pm_ops hp6x0_pm_ops = { - .pm_disk_mode = PM_DISK_FIRMWARE, .enter = hp6x0_pm_enter, }; diff --git a/include/linux/pm.h b/include/linux/pm.h index 9bd86db4d39..dfced9188bd 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -112,6 +112,8 @@ typedef int __bitwise suspend_state_t; typedef int __bitwise suspend_disk_method_t; +/* invalid must be 0 so struct pm_ops initialisers can leave it out */ +#define PM_DISK_INVALID ((__force suspend_disk_method_t) 0) #define PM_DISK_FIRMWARE ((__force suspend_disk_method_t) 1) #define PM_DISK_PLATFORM ((__force suspend_disk_method_t) 2) #define PM_DISK_SHUTDOWN ((__force suspend_disk_method_t) 3) @@ -137,17 +139,16 @@ typedef int __bitwise suspend_disk_method_t; * @finish: Called when the system has left the given state and all devices * are resumed. The return value is ignored. * - * @pm_disk_mode: Set to the disk method that the user should be able to - * configure for suspend-to-disk. Since %PM_DISK_SHUTDOWN, - * %PM_DISK_REBOOT, %PM_DISK_TEST and %PM_DISK_TESTPROC - * are always allowed, currently only %PM_DISK_PLATFORM - * makes sense. If the user then choses %PM_DISK_PLATFORM, - * the @prepare call will be called before suspending to disk - * (if present), the @enter call should be present and will - * be called after all state has been saved and the machine - * is ready to be shut down/suspended/..., and the @finish - * callback is called after state has been restored. All - * these calls are called with %PM_SUSPEND_DISK as the state. + * @pm_disk_mode: The generic code always allows one of the shutdown methods + * %PM_DISK_SHUTDOWN, %PM_DISK_REBOOT, %PM_DISK_TEST and + * %PM_DISK_TESTPROC. If this variable is set, the mode it is set + * to is allowed in addition to those modes and is also made default. + * When this mode is sent selected, the @prepare call will be called + * before suspending to disk (if present), the @enter call should be + * present and will be called after all state has been saved and the + * machine is ready to be powered off; the @finish callback is called + * after state has been restored. All these calls are called with + * %PM_SUSPEND_DISK as the state. */ struct pm_ops { int (*valid)(suspend_state_t state); diff --git a/kernel/power/disk.c b/kernel/power/disk.c index aec19b063e3..4de2f69fe09 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -39,7 +39,13 @@ static inline int platform_prepare(void) { int error = 0; - if (pm_disk_mode == PM_DISK_PLATFORM) { + switch (pm_disk_mode) { + case PM_DISK_TEST: + case PM_DISK_TESTPROC: + case PM_DISK_SHUTDOWN: + case PM_DISK_REBOOT: + break; + default: if (pm_ops && pm_ops->prepare) error = pm_ops->prepare(PM_SUSPEND_DISK); } @@ -48,40 +54,48 @@ static inline int platform_prepare(void) /** * power_down - Shut machine down for hibernate. - * @mode: Suspend-to-disk mode * - * Use the platform driver, if configured so, and return gracefully if it - * fails. - * Otherwise, try to power off and reboot. If they fail, halt the machine, - * there ain't no turning back. + * Use the platform driver, if configured so; otherwise try + * to power off or reboot. */ -static void power_down(suspend_disk_method_t mode) +static void power_down(void) { - switch(mode) { - case PM_DISK_PLATFORM: - if (pm_ops && pm_ops->enter) { - kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); - pm_ops->enter(PM_SUSPEND_DISK); - break; - } + switch (pm_disk_mode) { + case PM_DISK_TEST: + case PM_DISK_TESTPROC: + break; case PM_DISK_SHUTDOWN: kernel_power_off(); break; case PM_DISK_REBOOT: kernel_restart(NULL); break; + default: + if (pm_ops && pm_ops->enter) { + kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); + pm_ops->enter(PM_SUSPEND_DISK); + break; + } } kernel_halt(); - /* Valid image is on the disk, if we continue we risk serious data corruption - after resume. */ + /* + * Valid image is on the disk, if we continue we risk serious data + * corruption after resume. + */ printk(KERN_CRIT "Please power me down manually\n"); while(1); } static inline void platform_finish(void) { - if (pm_disk_mode == PM_DISK_PLATFORM) { + switch (pm_disk_mode) { + case PM_DISK_TEST: + case PM_DISK_TESTPROC: + case PM_DISK_SHUTDOWN: + case PM_DISK_REBOOT: + break; + default: if (pm_ops && pm_ops->finish) pm_ops->finish(PM_SUSPEND_DISK); } @@ -166,7 +180,7 @@ int pm_suspend_disk(void) pr_debug("PM: writing image.\n"); error = swsusp_write(); if (!error) - power_down(pm_disk_mode); + power_down(); else { swsusp_free(); goto Thaw; @@ -338,10 +352,14 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) } } if (mode) { - if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT || - mode == PM_DISK_TEST || mode == PM_DISK_TESTPROC) { + switch (mode) { + case PM_DISK_SHUTDOWN: + case PM_DISK_REBOOT: + case PM_DISK_TEST: + case PM_DISK_TESTPROC: pm_disk_mode = mode; - } else { + break; + default: if (pm_ops && pm_ops->enter && (mode == pm_ops->pm_disk_mode)) pm_disk_mode = mode; diff --git a/kernel/power/main.c b/kernel/power/main.c index 3062e940d1f..053c0a7d7f5 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -30,7 +30,7 @@ DEFINE_MUTEX(pm_mutex); struct pm_ops *pm_ops; -suspend_disk_method_t pm_disk_mode = PM_DISK_PLATFORM; +suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN; /** * pm_set_ops - Set the global power method table. @@ -41,6 +41,10 @@ void pm_set_ops(struct pm_ops * ops) { mutex_lock(&pm_mutex); pm_ops = ops; + if (ops && ops->pm_disk_mode != PM_DISK_INVALID) { + pm_disk_mode = ops->pm_disk_mode; + } else + pm_disk_mode = PM_DISK_SHUTDOWN; mutex_unlock(&pm_mutex); } -- cgit v1.2.3-70-g09d2 From 11d77d0c01b80e44c7aceb21928508dafce774f9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 30 Apr 2007 15:09:53 -0700 Subject: power management: remove firmware disk mode This patch removes the firmware disk suspend mode which is the wrong approach, it is supposed to be used for implementing firmware-based disk suspend but cannot actually be used for that. Signed-off-by: Johannes Berg Acked-by: Pavel Machek Cc: Cc: David Brownell Cc: Len Brown Acked-by: Russell King Cc: Greg KH Cc: "Rafael J. Wysocki" Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/power/interface.txt | 21 +++++---------------- Documentation/power/states.txt | 13 +++++++------ Documentation/power/swsusp.txt | 14 +++++--------- include/linux/pm.h | 13 ++++++------- kernel/power/disk.c | 27 +++++++++++---------------- 5 files changed, 34 insertions(+), 54 deletions(-) (limited to 'kernel') diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt index 74311d7e0f3..8c5b41bf3f3 100644 --- a/Documentation/power/interface.txt +++ b/Documentation/power/interface.txt @@ -18,17 +18,10 @@ states. /sys/power/disk controls the operating mode of the suspend-to-disk -mechanism. Suspend-to-disk can be handled in several ways. The -greatest distinction is who writes memory to disk - the firmware or -the kernel. If the firmware does it, we assume that it also handles -suspending the system. - -If the kernel does it, then we have three options for putting the system -to sleep - using the platform driver (e.g. ACPI or other PM -registers), powering off the system or rebooting the system (for -testing). The system will support either 'firmware' or 'platform', and -that is known a priori. But, the user may choose 'shutdown' or -'reboot' as alternatives. +mechanism. Suspend-to-disk can be handled in several ways. We have a +few options for putting the system to sleep - using the platform driver +(e.g. ACPI or other pm_ops), powering off the system or rebooting the +system (for testing). Additionally, /sys/power/disk can be used to turn on one of the two testing modes of the suspend-to-disk mechanism: 'testproc' or 'test'. If the @@ -44,16 +37,12 @@ is being slow and which device drivers are misbehaving. Reading from this file will display what the mode is currently set to. Writing to this file will accept one of - 'firmware' - 'platform' + 'platform' (only if the platform supports it) 'shutdown' 'reboot' 'testproc' 'test' -It will only change to 'firmware' or 'platform' if the system supports -it. - /sys/power/image_size controls the size of the image created by the suspend-to-disk mechanism. It can be written a string representing a non-negative integer that will be used as an upper diff --git a/Documentation/power/states.txt b/Documentation/power/states.txt index 0931a330d36..34800cc521b 100644 --- a/Documentation/power/states.txt +++ b/Documentation/power/states.txt @@ -62,17 +62,18 @@ setup via another operating system for it to use. Despite the inconvenience, this method requires minimal work by the kernel, since the firmware will also handle restoring memory contents on resume. -If the kernel is responsible for persistently saving state, a mechanism -called 'swsusp' (Swap Suspend) is used to write memory contents to -free swap space. swsusp has some restrictive requirements, but should -work in most cases. Some, albeit outdated, documentation can be found -in Documentation/power/swsusp.txt. +For suspend-to-disk, a mechanism called swsusp called 'swsusp' (Swap +Suspend) is used to write memory contents to free swap space. +swsusp has some restrictive requirements, but should work in most +cases. Some, albeit outdated, documentation can be found in +Documentation/power/swsusp.txt. Alternatively, userspace can do most +of the actual suspend to disk work, see userland-swsusp.txt. Once memory state is written to disk, the system may either enter a low-power state (like ACPI S4), or it may simply power down. Powering down offers greater savings, and allows this mechanism to work on any system. However, entering a real low-power state allows the user to -trigger wake up events (e.g. pressing a key or opening a laptop lid). +trigger wake up events (e.g. pressing a key or opening a laptop lid). A transition from Suspend-to-Disk to the On state should take about 30 seconds, though it's typically a bit more with the current diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index 0761ff6c57e..c55bd5079b9 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -156,8 +156,7 @@ instead set the PF_NOFREEZE process flag when creating the thread (and be very careful). -Q: What is the difference between "platform", "shutdown" and -"firmware" in /sys/power/disk? +Q: What is the difference between "platform" and "shutdown"? A: @@ -166,11 +165,8 @@ shutdown: save state in linux, then tell bios to powerdown platform: save state in linux, then tell bios to powerdown and blink "suspended led" -firmware: tell bios to save state itself [needs BIOS-specific suspend - partition, and has very little to do with swsusp] - -"platform" is actually right thing to do, but "shutdown" is most -reliable. +"platform" is actually right thing to do where supported, but +"shutdown" is most reliable (except on ACPI systems). Q: I do not understand why you have such strong objections to idea of selective suspend. @@ -388,8 +384,8 @@ while the system is asleep, maintaining the connection, using true sleep modes like "suspend-to-RAM" or "standby". (Don't write "disk" to the /sys/power/state file; write "standby" or "mem".) We've not seen any hardware that can use these modes through software suspend, although in -theory some systems might support "platform" or "firmware" modes that -won't break the USB connections. +theory some systems might support "platform" modes that won't break the +USB connections. Remember that it's always a bad idea to unplug a disk drive containing a mounted filesystem. That's true even when your system is asleep! The diff --git a/include/linux/pm.h b/include/linux/pm.h index dfced9188bd..c2a55f94c29 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -114,13 +114,12 @@ typedef int __bitwise suspend_disk_method_t; /* invalid must be 0 so struct pm_ops initialisers can leave it out */ #define PM_DISK_INVALID ((__force suspend_disk_method_t) 0) -#define PM_DISK_FIRMWARE ((__force suspend_disk_method_t) 1) -#define PM_DISK_PLATFORM ((__force suspend_disk_method_t) 2) -#define PM_DISK_SHUTDOWN ((__force suspend_disk_method_t) 3) -#define PM_DISK_REBOOT ((__force suspend_disk_method_t) 4) -#define PM_DISK_TEST ((__force suspend_disk_method_t) 5) -#define PM_DISK_TESTPROC ((__force suspend_disk_method_t) 6) -#define PM_DISK_MAX ((__force suspend_disk_method_t) 7) +#define PM_DISK_PLATFORM ((__force suspend_disk_method_t) 1) +#define PM_DISK_SHUTDOWN ((__force suspend_disk_method_t) 2) +#define PM_DISK_REBOOT ((__force suspend_disk_method_t) 3) +#define PM_DISK_TEST ((__force suspend_disk_method_t) 4) +#define PM_DISK_TESTPROC ((__force suspend_disk_method_t) 5) +#define PM_DISK_MAX ((__force suspend_disk_method_t) 6) /** * struct pm_ops - Callbacks for managing platform dependent suspend states. diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 4de2f69fe09..02e4fb69111 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -122,8 +122,6 @@ static int prepare_processes(void) /** * pm_suspend_disk - The granpappy of hibernation power management. * - * If we're going through the firmware, then get it over with quickly. - * * If not, then call swsusp to do its thing, then figure out how * to power down the system. */ @@ -292,7 +290,6 @@ late_initcall(software_resume); static const char * const pm_disk_modes[] = { - [PM_DISK_FIRMWARE] = "firmware", [PM_DISK_PLATFORM] = "platform", [PM_DISK_SHUTDOWN] = "shutdown", [PM_DISK_REBOOT] = "reboot", @@ -303,27 +300,25 @@ static const char * const pm_disk_modes[] = { /** * disk - Control suspend-to-disk mode * - * Suspend-to-disk can be handled in several ways. The greatest - * distinction is who writes memory to disk - the firmware or the OS. - * If the firmware does it, we assume that it also handles suspending - * the system. - * If the OS does it, then we have three options for putting the system - * to sleep - using the platform driver (e.g. ACPI or other PM registers), - * powering off the system or rebooting the system (for testing). + * Suspend-to-disk can be handled in several ways. We have a few options + * for putting the system to sleep - using the platform driver (e.g. ACPI + * or other pm_ops), powering off the system or rebooting the system + * (for testing) as well as the two test modes. * - * The system will support either 'firmware' or 'platform', and that is - * known a priori (and encoded in pm_ops). But, the user may choose - * 'shutdown' or 'reboot' as alternatives. + * The system can support 'platform', and that is known a priori (and + * encoded in pm_ops). However, the user may choose 'shutdown' or 'reboot' + * as alternatives, as well as the test modes 'test' and 'testproc'. * * show() will display what the mode is currently set to. * store() will accept one of * - * 'firmware' * 'platform' * 'shutdown' * 'reboot' + * 'test' + * 'testproc' * - * It will only change to 'firmware' or 'platform' if the system + * It will only change to 'platform' if the system * supports it (as determined from pm_ops->pm_disk_mode). */ @@ -345,7 +340,7 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) len = p ? p - buf : n; mutex_lock(&pm_mutex); - for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { + for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) { if (!strncmp(buf, pm_disk_modes[i], len)) { mode = i; break; -- cgit v1.2.3-70-g09d2 From e8c9c502690efd24b7055bf608e7a3c34216848b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 30 Apr 2007 15:09:54 -0700 Subject: power management: implement pm_ops.valid for everybody Almost all users of pm_ops only support mem sleep, don't check in .valid and don't reject any others in .prepare so users can be confused if they check /sys/power/state, especially when new states are added (these would then result in s-t-r although they're supposed to be something different). This patch implements a generic pm_valid_only_mem function that is then exported for users and puts it to use in almost all existing pm_ops. Signed-off-by: Johannes Berg Cc: David Brownell Acked-by: Pavel Machek Cc: linux-pm@lists.linux-foundation.org Cc: Len Brown Acked-by: Russell King Cc: Greg KH Cc: "Rafael J. Wysocki" Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/common/sharpsl_pm.c | 1 + arch/arm/mach-omap1/pm.c | 1 + arch/arm/mach-omap2/pm.c | 1 + arch/arm/mach-pnx4008/pm.c | 39 ++++----------------------------------- arch/arm/mach-pxa/pm.c | 1 + arch/arm/mach-sa1100/pm.c | 1 + arch/arm/plat-s3c24xx/pm.c | 19 +------------------ arch/sh/boards/hp6xx/pm.c | 1 + drivers/acpi/sleep/main.c | 13 +++++++++++-- include/linux/pm.h | 4 ++++ kernel/power/main.c | 13 +++++++++++++ 11 files changed, 39 insertions(+), 55 deletions(-) (limited to 'kernel') diff --git a/arch/arm/common/sharpsl_pm.c b/arch/arm/common/sharpsl_pm.c index 4cb895d4ae5..5972df2b9af 100644 --- a/arch/arm/common/sharpsl_pm.c +++ b/arch/arm/common/sharpsl_pm.c @@ -769,6 +769,7 @@ static struct pm_ops sharpsl_pm_ops = { .prepare = pxa_pm_prepare, .enter = corgi_pxa_pm_enter, .finish = pxa_pm_finish, + .valid = pm_valid_only_mem, }; static int __init sharpsl_pm_probe(struct platform_device *pdev) diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index 4248117e5e3..0383ab33427 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -701,6 +701,7 @@ static struct pm_ops omap_pm_ops ={ .prepare = omap_pm_prepare, .enter = omap_pm_enter, .finish = omap_pm_finish, + .valid = pm_valid_only_mem, }; static int __init omap_pm_init(void) diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index 3e9a128feea..6f4a5436d0c 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -373,6 +373,7 @@ static struct pm_ops omap_pm_ops = { .prepare = omap2_pm_prepare, .enter = omap2_pm_enter, .finish = omap2_pm_finish, + .valid = pm_valid_only_mem, }; int __init omap2_pm_init(void) diff --git a/arch/arm/mach-pnx4008/pm.c b/arch/arm/mach-pnx4008/pm.c index 3649cd3dfc9..2a137f33f75 100644 --- a/arch/arm/mach-pnx4008/pm.c +++ b/arch/arm/mach-pnx4008/pm.c @@ -107,50 +107,19 @@ static int pnx4008_pm_enter(suspend_state_t state) case PM_SUSPEND_MEM: pnx4008_suspend(); break; - case PM_SUSPEND_DISK: - return -ENOTSUPP; - default: - return -EINVAL; } return 0; } -/* - * Called after processes are frozen, but before we shut down devices. - */ -static int pnx4008_pm_prepare(suspend_state_t state) -{ - switch (state) { - case PM_SUSPEND_STANDBY: - case PM_SUSPEND_MEM: - break; - - case PM_SUSPEND_DISK: - return -ENOTSUPP; - break; - - default: - return -EINVAL; - break; - } - return 0; -} - -/* - * Called after devices are re-setup, but before processes are thawed. - */ -static int pnx4008_pm_finish(suspend_state_t state) +static int pnx4008_pm_valid(suspend_state_t state) { - return 0; + return (state == PM_SUSPEND_STANDBY) || + (state == PM_SUSPEND_MEM); } -/* - * Set to PM_DISK_FIRMWARE so we can quickly veto suspend-to-disk. - */ static struct pm_ops pnx4008_pm_ops = { - .prepare = pnx4008_pm_prepare, .enter = pnx4008_pm_enter, - .finish = pnx4008_pm_finish, + .valid = pnx4008_pm_valid, }; static int __init pnx4008_pm_init(void) diff --git a/arch/arm/mach-pxa/pm.c b/arch/arm/mach-pxa/pm.c index 0a99ef43b36..6bf15ae7384 100644 --- a/arch/arm/mach-pxa/pm.c +++ b/arch/arm/mach-pxa/pm.c @@ -227,6 +227,7 @@ static struct pm_ops pxa_pm_ops = { .prepare = pxa_pm_prepare, .enter = pxa_pm_enter, .finish = pxa_pm_finish, + .valid = pm_valid_only_mem, }; static int __init pxa_pm_init(void) diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c index b0837113b2a..d674cf34315 100644 --- a/arch/arm/mach-sa1100/pm.c +++ b/arch/arm/mach-sa1100/pm.c @@ -133,6 +133,7 @@ unsigned long sleep_phys_sp(void *sp) static struct pm_ops sa11x0_pm_ops = { .enter = sa11x0_pm_enter, + .valid = pm_valid_only_mem, }; static int __init sa11x0_pm_init(void) diff --git a/arch/arm/plat-s3c24xx/pm.c b/arch/arm/plat-s3c24xx/pm.c index d6af3082af3..c6b03f8ab26 100644 --- a/arch/arm/plat-s3c24xx/pm.c +++ b/arch/arm/plat-s3c24xx/pm.c @@ -612,26 +612,9 @@ static int s3c2410_pm_enter(suspend_state_t state) return 0; } -/* - * Called after processes are frozen, but before we shut down devices. - */ -static int s3c2410_pm_prepare(suspend_state_t state) -{ - return 0; -} - -/* - * Called after devices are re-setup, but before processes are thawed. - */ -static int s3c2410_pm_finish(suspend_state_t state) -{ - return 0; -} - static struct pm_ops s3c2410_pm_ops = { - .prepare = s3c2410_pm_prepare, .enter = s3c2410_pm_enter, - .finish = s3c2410_pm_finish, + .valid = pm_valid_only_mem, }; /* s3c2410_pm_init diff --git a/arch/sh/boards/hp6xx/pm.c b/arch/sh/boards/hp6xx/pm.c index 4b2f29a4bde..8143d1b948e 100644 --- a/arch/sh/boards/hp6xx/pm.c +++ b/arch/sh/boards/hp6xx/pm.c @@ -69,6 +69,7 @@ static int hp6x0_pm_enter(suspend_state_t state) static struct pm_ops hp6x0_pm_ops = { .enter = hp6x0_pm_enter, + .valid = pm_valid_only_mem, }; static int __init hp6x0_pm_init(void) diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index 37a0930fc0a..f8c63410bcb 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -168,9 +168,18 @@ int acpi_suspend(u32 acpi_state) static int acpi_pm_state_valid(suspend_state_t pm_state) { - u32 acpi_state = acpi_suspend_states[pm_state]; + u32 acpi_state; + + switch (pm_state) { + case PM_SUSPEND_ON: + case PM_SUSPEND_STANDBY: + case PM_SUSPEND_MEM: + acpi_state = acpi_suspend_states[pm_state]; - return sleep_states[acpi_state]; + return sleep_states[acpi_state]; + default: + return 0; + } } static struct pm_ops acpi_pm_ops = { diff --git a/include/linux/pm.h b/include/linux/pm.h index c2a55f94c29..6035209cf31 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -128,6 +128,9 @@ typedef int __bitwise suspend_disk_method_t; * always valid and never passed to this call. * If not assigned, all suspend states are advertised as valid * in /sys/power/state (but can still be rejected by prepare or enter.) + * Since new states can be added for other platforms, you should + * assign this callback. There is a %pm_valid_only_mem function + * available if you only implemented mem sleep. * * @prepare: Prepare the platform for the given suspend state. Can return a * negative error code if necessary. @@ -165,6 +168,7 @@ extern void pm_set_ops(struct pm_ops *pm_ops); extern struct pm_ops *pm_ops; extern int pm_suspend(suspend_state_t state); +extern int pm_valid_only_mem(suspend_state_t state); /** * arch_suspend_disable_irqs - disable IRQs for suspend diff --git a/kernel/power/main.c b/kernel/power/main.c index 053c0a7d7f5..f94f4e20115 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -48,6 +48,19 @@ void pm_set_ops(struct pm_ops * ops) mutex_unlock(&pm_mutex); } +/** + * pm_valid_only_mem - generic memory-only valid callback + * + * pm_ops drivers that implement mem suspend only and only need + * to check for that in their .valid callback can use this instead + * of rolling their own .valid callback. + */ +int pm_valid_only_mem(suspend_state_t state) +{ + return state == PM_SUSPEND_MEM; +} + + static inline void pm_finish(suspend_state_t state) { if (pm_ops->finish) -- cgit v1.2.3-70-g09d2 From 9684e51cd157607f0727c1550e7df6e31de40808 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 30 Apr 2007 15:09:55 -0700 Subject: power management: force pm_ops.valid callback to be assigned This patch changes the docs and behaviour from "all states valid" to "no states valid" if no .valid callback is assigned. Users of pm_ops that only need mem sleep can assign pm_valid_only_mem without any overhead, others will require more elaborate callbacks. Now that all users of pm_ops have a .valid callback this is a safe thing to do and prevents things from getting messy again as they were before. Signed-off-by: Johannes Berg Acked-by: Pavel Machek Looks-okay-to: Rafael J. Wysocki Cc: Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pm.h | 12 ++++++------ kernel/power/main.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/include/linux/pm.h b/include/linux/pm.h index 6035209cf31..7a516690dcb 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -125,12 +125,12 @@ typedef int __bitwise suspend_disk_method_t; * struct pm_ops - Callbacks for managing platform dependent suspend states. * @valid: Callback to determine whether the given state can be entered. * If %CONFIG_SOFTWARE_SUSPEND is set then %PM_SUSPEND_DISK is - * always valid and never passed to this call. - * If not assigned, all suspend states are advertised as valid - * in /sys/power/state (but can still be rejected by prepare or enter.) - * Since new states can be added for other platforms, you should - * assign this callback. There is a %pm_valid_only_mem function - * available if you only implemented mem sleep. + * always valid and never passed to this call. If not assigned, + * no suspend states are valid. + * Valid states are advertised in /sys/power/state but can still + * be rejected by prepare or enter if the conditions aren't right. + * There is a %pm_valid_only_mem function available that can be assigned + * to this if you only implement mem sleep. * * @prepare: Prepare the platform for the given suspend state. Can return a * negative error code if necessary. diff --git a/kernel/power/main.c b/kernel/power/main.c index f94f4e20115..72419a3b1be 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -198,8 +198,8 @@ static inline int valid_state(suspend_state_t state) /* all other states need lowlevel support and need to be * valid to the lowlevel implementation, no valid callback - * implies that all are valid. */ - if (!pm_ops || (pm_ops->valid && !pm_ops->valid(state))) + * implies that none are valid. */ + if (!pm_ops || !pm_ops->valid || !pm_ops->valid(state)) return 0; return 1; } -- cgit v1.2.3-70-g09d2