From 7c8265f51073bc8632a99de78d5fd19117ed78b7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 24 Jun 2006 14:50:29 -0700 Subject: Suspend infrastructure cleanup and extension Allow devices to participate in the suspend process more intimately, in particular, allow the final phase (with interrupts disabled) to also be open to normal devices, not just system devices. Also, allow classes to participate in device suspend. Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/power/main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/power/main.c b/kernel/power/main.c index 6d295c77679..0c3ed6ac938 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -57,6 +57,10 @@ static int suspend_prepare(suspend_state_t state) if (!pm_ops || !pm_ops->enter) return -EPERM; + error = device_prepare_suspend(PMSG_SUSPEND); + if (error) + return error; + pm_prepare_console(); disable_nonboot_cpus(); -- cgit v1.2.3-70-g09d2 From f1cc0a894c963923b766eb2d455747495e6e982d Mon Sep 17 00:00:00 2001 From: David Brownell Date: Mon, 14 Aug 2006 23:11:08 -0700 Subject: PM: issue PM_EVENT_PRETHAW This patch is the first of this series that should actually change any behavior ... by issuing the new event, now tha the rest of the kernel is prepared to receive it. This converts the PM core to issue the new PRETHAW message, which the rest of the kernel is now ready to receive. Signed-off-by: David Brownell Cc: "Rafael J. Wysocki" Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/suspend.c | 1 + kernel/power/disk.c | 4 ++-- kernel/power/swsusp.c | 9 ++++++++- kernel/power/user.c | 2 +- 4 files changed, 12 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/drivers/base/power/suspend.c b/drivers/base/power/suspend.c index 0bda4a7f204..e86db83746a 100644 --- a/drivers/base/power/suspend.c +++ b/drivers/base/power/suspend.c @@ -34,6 +34,7 @@ static inline char *suspend_verb(u32 event) switch (event) { case PM_EVENT_SUSPEND: return "suspend"; case PM_EVENT_FREEZE: return "freeze"; + case PM_EVENT_PRETHAW: return "prethaw"; default: return "(unknown suspend event)"; } } diff --git a/kernel/power/disk.c b/kernel/power/disk.c index e13e7406784..a3c34fb1432 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -98,7 +98,7 @@ static void unprepare_processes(void) } /** - * pm_suspend_disk - The granpappy of power management. + * pm_suspend_disk - The granpappy of hibernation power management. * * If we're going through the firmware, then get it over with quickly. * @@ -207,7 +207,7 @@ static int software_resume(void) pr_debug("PM: Preparing devices for restore.\n"); - if ((error = device_suspend(PMSG_FREEZE))) { + if ((error = device_suspend(PMSG_PRETHAW))) { printk("Some devices failed to suspend\n"); swsusp_free(); goto Thaw; diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 17f669c8301..62752899b1a 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -248,6 +248,9 @@ int swsusp_suspend(void) restore_processor_state(); Restore_highmem: restore_highmem(); + /* NOTE: device_power_up() is just a resume() for devices + * that suspended with irqs off ... no overall powerup. + */ device_power_up(); Enable_irqs: local_irq_enable(); @@ -257,8 +260,12 @@ Enable_irqs: int swsusp_resume(void) { int error; + local_irq_disable(); - if (device_power_down(PMSG_FREEZE)) + /* NOTE: device_power_down() is just a suspend() with irqs off; + * it has no special "power things down" semantics + */ + if (device_power_down(PMSG_PRETHAW)) printk(KERN_ERR "Some devices failed to power down, very bad\n"); /* We'll ignore saved state, but this gets preempt count (etc) right */ save_processor_state(); diff --git a/kernel/power/user.c b/kernel/power/user.c index 3f1539fbe48..5a8d060d790 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -191,7 +191,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, } down(&pm_sem); pm_prepare_console(); - error = device_suspend(PMSG_FREEZE); + error = device_suspend(PMSG_PRETHAW); if (!error) { error = swsusp_resume(); device_resume(); -- cgit v1.2.3-70-g09d2 From 2bca293e56b6a8cd16bb6e70a09b2adac9c723b5 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Wed, 30 Aug 2006 13:54:36 -0700 Subject: PM: add kconfig option for deprecated .../power/state files Add a new PM_SYSFS_DEPRECATED config option to control whether or not the /sys/devices/.../power/state files are provided. This will make it easier to get rid of that mechanism when the time comes, and to verify that userspace tools work right without it. Signed-off-by: David Brownell Acked-by: Pavel Machek Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/sysfs.c | 6 ++++++ kernel/power/Kconfig | 11 +++++++++++ 2 files changed, 17 insertions(+) (limited to 'kernel') diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index e55b3c2779e..2d47517dbe3 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -7,6 +7,8 @@ #include "power.h" +#ifdef CONFIG_PM_SYSFS_DEPRECATED + /** * state - Control current power state of device * @@ -66,6 +68,8 @@ static ssize_t state_store(struct device * dev, struct device_attribute *attr, c static DEVICE_ATTR(state, 0644, state_show, state_store); +#endif /* CONFIG_PM_SYSFS_DEPRECATED */ + /* * wakeup - Report/change current wakeup option for device * @@ -139,7 +143,9 @@ static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store); static struct attribute * power_attrs[] = { +#ifdef CONFIG_PM_SYSFS_DEPRECATED &dev_attr_state.attr, +#endif &dev_attr_wakeup.attr, NULL, }; diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 619ecabf7c5..1ed972070d1 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -53,6 +53,17 @@ config PM_TRACE CAUTION: this option will cause your machine's real-time clock to be set to an invalid time after a resume. +config PM_SYSFS_DEPRECATED + bool "Driver model /sys/devices/.../power/state files (DEPRECATED)" + depends on PM && SYSFS + default n + help + The driver model started out with a sysfs file intended to provide + a userspace hook for device power management. This feature has never + worked very well, except for limited testing purposes, and so it will + be removed. It's not clear that a generic mechanism could really + handle the wide variability of device power states; any replacements + are likely to be bus or driver specific. config SOFTWARE_SUSPEND bool "Software Suspend" -- cgit v1.2.3-70-g09d2 From 1d3a82af45428c5e8deaa119cdeb79611ae46371 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Wed, 30 Aug 2006 14:09:47 -0700 Subject: PM: no suspend_prepare() phase Remove the new suspend_prepare() phase. It doesn't seem very usable, has never been tested, doesn't address fault cleanup, and would need a sibling resume_complete(); plus there are no real use cases. It could be restored later if those issues get resolved. Signed-off-by: David Brownell Cc: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/suspend.c | 27 --------------------------- drivers/pci/pci-driver.c | 14 -------------- include/linux/device.h | 1 - include/linux/pci.h | 1 - kernel/power/main.c | 4 ---- 5 files changed, 47 deletions(-) (limited to 'kernel') diff --git a/drivers/base/power/suspend.c b/drivers/base/power/suspend.c index e86db83746a..6453c25103d 100644 --- a/drivers/base/power/suspend.c +++ b/drivers/base/power/suspend.c @@ -117,33 +117,6 @@ static int suspend_device_late(struct device *dev, pm_message_t state) return error; } -/** - * device_prepare_suspend - save state and prepare to suspend - * - * NOTE! Devices cannot detach at this point - not only do we - * hold the device list semaphores over the whole prepare, but - * the whole point is to do non-invasive preparatory work, not - * the actual suspend. - */ -int device_prepare_suspend(pm_message_t state) -{ - int error = 0; - struct device * dev; - - down(&dpm_sem); - down(&dpm_list_sem); - list_for_each_entry_reverse(dev, &dpm_active, power.entry) { - if (!dev->bus || !dev->bus->suspend_prepare) - continue; - error = dev->bus->suspend_prepare(dev, state); - if (error) - break; - } - up(&dpm_list_sem); - up(&dpm_sem); - return error; -} - /** * device_suspend - Save state and stop all devices in system. * @state: Power state to put each device in. diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 9e7d6ceb380..8948ac9ab68 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -264,19 +264,6 @@ static int pci_device_remove(struct device * dev) return 0; } -static int pci_device_suspend_prepare(struct device * dev, pm_message_t state) -{ - struct pci_dev * pci_dev = to_pci_dev(dev); - struct pci_driver * drv = pci_dev->driver; - int i = 0; - - if (drv && drv->suspend_prepare) { - i = drv->suspend_prepare(pci_dev, state); - suspend_report_result(drv->suspend_prepare, i); - } - return i; -} - static int pci_device_suspend(struct device * dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); @@ -544,7 +531,6 @@ struct bus_type pci_bus_type = { .uevent = pci_uevent, .probe = pci_device_probe, .remove = pci_device_remove, - .suspend_prepare= pci_device_suspend_prepare, .suspend = pci_device_suspend, .suspend_late = pci_device_suspend_late, .resume_early = pci_device_resume_early, diff --git a/include/linux/device.h b/include/linux/device.h index b40be6fca6f..b3646462d6d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -52,7 +52,6 @@ struct bus_type { int (*remove)(struct device * dev); void (*shutdown)(struct device * dev); - int (*suspend_prepare)(struct device * dev, pm_message_t state); int (*suspend)(struct device * dev, pm_message_t state); int (*suspend_late)(struct device * dev, pm_message_t state); int (*resume_early)(struct device * dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index 4b2e629467c..9514bbfe96e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -345,7 +345,6 @@ struct pci_driver { const struct pci_device_id *id_table; /* must be non-NULL for probe to be called */ int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ - int (*suspend_prepare) (struct pci_dev *dev, pm_message_t state); int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ int (*suspend_late) (struct pci_dev *dev, pm_message_t state); int (*resume_early) (struct pci_dev *dev); diff --git a/kernel/power/main.c b/kernel/power/main.c index 0c3ed6ac938..6d295c77679 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -57,10 +57,6 @@ static int suspend_prepare(suspend_state_t state) if (!pm_ops || !pm_ops->enter) return -EPERM; - error = device_prepare_suspend(PMSG_SUSPEND); - if (error) - return error; - pm_prepare_console(); disable_nonboot_cpus(); -- cgit v1.2.3-70-g09d2 From 2fbe7b25c8edaf2d10e6c1a4cc9f8afe714c4764 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] i386/x86-64: Remove un/set_nmi_callback and reserve/release_lapic_nmi functions Removes the un/set_nmi_callback and reserve/release_lapic_nmi functions as they are no longer needed. The various subsystems are modified to register with the die_notifier instead. Also includes compile fixes by Andrew Morton. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/kernel/crash.c | 20 +++++++- arch/i386/kernel/nmi.c | 85 ++++--------------------------- arch/i386/kernel/traps.c | 23 +-------- arch/i386/oprofile/nmi_int.c | 47 ++++++++++------- arch/i386/oprofile/nmi_timer_int.c | 33 +++++++++--- arch/x86_64/kernel/crash.c | 20 +++++++- arch/x86_64/kernel/nmi.c | 102 +++---------------------------------- include/asm-i386/nmi.h | 21 ++------ include/asm-x86_64/nmi.h | 21 -------- kernel/sysctl.c | 4 +- 10 files changed, 116 insertions(+), 260 deletions(-) (limited to 'kernel') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 5b96f038367..736c76d6b31 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -22,6 +22,8 @@ #include #include #include +#include + #include @@ -93,9 +95,18 @@ static void crash_save_self(struct pt_regs *regs) #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static atomic_t waiting_for_crash_ipi; -static int crash_nmi_callback(struct pt_regs *regs, int cpu) +static int crash_nmi_callback(struct notifier_block *self, + unsigned long val, void *data) { + struct pt_regs *regs; struct pt_regs fixed_regs; + int cpu; + + if (val != DIE_NMI) + return NOTIFY_OK; + + regs = ((struct die_args *)data)->regs; + cpu = raw_smp_processor_id(); /* Don't do anything if this handler is invoked on crashing cpu. * Otherwise, system will completely hang. Crashing cpu can get @@ -125,13 +136,18 @@ static void smp_send_nmi_allbutself(void) send_IPI_allbutself(NMI_VECTOR); } +static struct notifier_block crash_nmi_nb = { + .notifier_call = crash_nmi_callback, +}; + static void nmi_shootdown_cpus(void) { unsigned long msecs; atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); /* Would it be better to replace the trap vector here? */ - set_nmi_callback(crash_nmi_callback); + if (register_die_notifier(&crash_nmi_nb)) + return; /* return what? */ /* Ensure the new callback function is set before sending * out the NMI */ diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index bd96ea4f294..acd3fdea2a2 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -42,20 +42,6 @@ static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); */ #define NMI_MAX_COUNTER_BITS 66 -/* - * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: - * - it may be reserved by some other driver, or not - * - when not reserved by some other driver, it may be used for - * the NMI watchdog, or not - * - * This is maintained separately from nmi_active because the NMI - * watchdog may also be driven from the I/O APIC timer. - */ -static DEFINE_SPINLOCK(lapic_nmi_owner_lock); -static unsigned int lapic_nmi_owner; -#define LAPIC_NMI_WATCHDOG (1<<0) -#define LAPIC_NMI_RESERVED (1<<1) - /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled * <0: the lapic NMI watchdog has not been set up, and cannot @@ -325,33 +311,6 @@ static void enable_lapic_nmi_watchdog(void) touch_nmi_watchdog(); } -int reserve_lapic_nmi(void) -{ - unsigned int old_owner; - - spin_lock(&lapic_nmi_owner_lock); - old_owner = lapic_nmi_owner; - lapic_nmi_owner |= LAPIC_NMI_RESERVED; - spin_unlock(&lapic_nmi_owner_lock); - if (old_owner & LAPIC_NMI_RESERVED) - return -EBUSY; - if (old_owner & LAPIC_NMI_WATCHDOG) - disable_lapic_nmi_watchdog(); - return 0; -} - -void release_lapic_nmi(void) -{ - unsigned int new_owner; - - spin_lock(&lapic_nmi_owner_lock); - new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; - lapic_nmi_owner = new_owner; - spin_unlock(&lapic_nmi_owner_lock); - if (new_owner & LAPIC_NMI_WATCHDOG) - enable_lapic_nmi_watchdog(); -} - void disable_timer_nmi_watchdog(void) { BUG_ON(nmi_watchdog != NMI_IO_APIC); @@ -866,6 +825,15 @@ done: return rc; } +int do_nmi_callback(struct pt_regs * regs, int cpu) +{ +#ifdef CONFIG_SYSCTL + if (unknown_nmi_panic) + return unknown_nmi_panic_callback(regs, cpu); +#endif + return 0; +} + #ifdef CONFIG_SYSCTL static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) @@ -873,37 +841,8 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) unsigned char reason = get_nmi_reason(); char buf[64]; - if (!(reason & 0xc0)) { - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(regs, buf); - } - return 0; -} - -/* - * proc handler for /proc/sys/kernel/unknown_nmi_panic - */ -int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - old_state = unknown_nmi_panic; - proc_dointvec(table, write, file, buffer, length, ppos); - if (!!old_state == !!unknown_nmi_panic) - return 0; - - if (unknown_nmi_panic) { - if (reserve_lapic_nmi() < 0) { - unknown_nmi_panic = 0; - return -EBUSY; - } else { - set_nmi_callback(unknown_nmi_panic_callback); - } - } else { - release_lapic_nmi(); - unset_nmi_callback(); - } + sprintf(buf, "NMI received for unknown reason %02x\n", reason); + die_nmi(regs, buf); return 0; } @@ -917,7 +856,5 @@ EXPORT_SYMBOL(reserve_perfctr_nmi); EXPORT_SYMBOL(release_perfctr_nmi); EXPORT_SYMBOL(reserve_evntsel_nmi); EXPORT_SYMBOL(release_evntsel_nmi); -EXPORT_SYMBOL(reserve_lapic_nmi); -EXPORT_SYMBOL(release_lapic_nmi); EXPORT_SYMBOL(disable_timer_nmi_watchdog); EXPORT_SYMBOL(enable_timer_nmi_watchdog); diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 282f0bd40df..7db664d0b25 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -706,13 +706,6 @@ void die_nmi (struct pt_regs *regs, const char *msg) do_exit(SIGSEGV); } -static int dummy_nmi_callback(struct pt_regs * regs, int cpu) -{ - return 0; -} - -static nmi_callback_t nmi_callback = dummy_nmi_callback; - static void default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; @@ -732,9 +725,10 @@ static void default_do_nmi(struct pt_regs * regs) */ if (nmi_watchdog_tick(regs, reason)) return; + if (!do_nmi_callback(regs, smp_processor_id())) #endif - if (!rcu_dereference(nmi_callback)(regs, smp_processor_id())) unknown_nmi_error(reason, regs); + return; } if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -765,19 +759,6 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) nmi_exit(); } -void set_nmi_callback(nmi_callback_t callback) -{ - vmalloc_sync_all(); - rcu_assign_pointer(nmi_callback, callback); -} -EXPORT_SYMBOL_GPL(set_nmi_callback); - -void unset_nmi_callback(void) -{ - nmi_callback = dummy_nmi_callback; -} -EXPORT_SYMBOL_GPL(unset_nmi_callback); - #ifdef CONFIG_KPROBES fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index 8710ca081b1..b3610188bcf 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c @@ -17,14 +17,15 @@ #include #include #include +#include #include "op_counter.h" #include "op_x86_model.h" - + static struct op_x86_model_spec const * model; static struct op_msrs cpu_msrs[NR_CPUS]; static unsigned long saved_lvtpc[NR_CPUS]; - + static int nmi_start(void); static void nmi_stop(void); @@ -82,13 +83,24 @@ static void exit_driverfs(void) #define exit_driverfs() do { } while (0) #endif /* CONFIG_PM */ - -static int nmi_callback(struct pt_regs * regs, int cpu) +int profile_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { - return model->check_ctrs(regs, &cpu_msrs[cpu]); + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + int cpu = smp_processor_id(); + + switch(val) { + case DIE_NMI: + if (model->check_ctrs(args->regs, &cpu_msrs[cpu])) + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; } - - + static void nmi_cpu_save_registers(struct op_msrs * msrs) { unsigned int const nr_ctrs = model->num_counters; @@ -174,27 +186,29 @@ static void nmi_cpu_setup(void * dummy) apic_write(APIC_LVTPC, APIC_DM_NMI); } +static struct notifier_block profile_exceptions_nb = { + .notifier_call = profile_exceptions_notify, + .next = NULL, + .priority = 0 +}; static int nmi_setup(void) { + int err=0; + if (!allocate_msrs()) return -ENOMEM; - /* We walk a thin line between law and rape here. - * We need to be careful to install our NMI handler - * without actually triggering any NMIs as this will - * break the core code horrifically. - */ - if (reserve_lapic_nmi() < 0) { + if ((err = register_die_notifier(&profile_exceptions_nb))){ free_msrs(); - return -EBUSY; + return err; } + /* We need to serialize save and setup for HT because the subset * of msrs are distinct for save and setup operations */ on_each_cpu(nmi_save_registers, NULL, 0, 1); on_each_cpu(nmi_cpu_setup, NULL, 0, 1); - set_nmi_callback(nmi_callback); nmi_enabled = 1; return 0; } @@ -250,8 +264,7 @@ static void nmi_shutdown(void) { nmi_enabled = 0; on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); - unset_nmi_callback(); - release_lapic_nmi(); + unregister_die_notifier(&profile_exceptions_nb); free_msrs(); } diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c index a33a73bb502..93ca48f804a 100644 --- a/arch/i386/oprofile/nmi_timer_int.c +++ b/arch/i386/oprofile/nmi_timer_int.c @@ -17,32 +17,49 @@ #include #include #include +#include -static int nmi_timer_callback(struct pt_regs * regs, int cpu) +int profile_timer_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { - oprofile_add_sample(regs, 0); - return 1; + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch(val) { + case DIE_NMI: + oprofile_add_sample(args->regs, 0); + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; } +static struct notifier_block profile_timer_exceptions_nb = { + .notifier_call = profile_timer_exceptions_notify, + .next = NULL, + .priority = 0 +}; + static int timer_start(void) { - disable_timer_nmi_watchdog(); - set_nmi_callback(nmi_timer_callback); + if (register_die_notifier(&profile_timer_exceptions_nb)) + return 1; return 0; } static void timer_stop(void) { - enable_timer_nmi_watchdog(); - unset_nmi_callback(); + unregister_die_notifier(&profile_timer_exceptions_nb); synchronize_sched(); /* Allow already-started NMIs to complete. */ } int __init op_nmi_timer_init(struct oprofile_operations * ops) { - if (atomic_read(&nmi_active) <= 0) + if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) return -ENODEV; ops->start = timer_start; diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c index d8d5750d610..44c8af65325 100644 --- a/arch/x86_64/kernel/crash.c +++ b/arch/x86_64/kernel/crash.c @@ -23,6 +23,7 @@ #include #include #include +#include /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; @@ -95,8 +96,18 @@ static void crash_save_self(struct pt_regs *regs) #ifdef CONFIG_SMP static atomic_t waiting_for_crash_ipi; -static int crash_nmi_callback(struct pt_regs *regs, int cpu) +static int crash_nmi_callback(struct notifier_block *self, + unsigned long val, void *data) { + struct pt_regs *regs; + int cpu; + + if (val != DIE_NMI) + return NOTIFY_OK; + + regs = ((struct die_args *)data)->regs; + cpu = raw_smp_processor_id(); + /* * Don't do anything if this handler is invoked on crashing cpu. * Otherwise, system will completely hang. Crashing cpu can get @@ -127,12 +138,17 @@ static void smp_send_nmi_allbutself(void) * cpu hotplug shouldn't matter. */ +static struct notifier_block crash_nmi_nb = { + .notifier_call = crash_nmi_callback, +}; + static void nmi_shootdown_cpus(void) { unsigned long msecs; atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); - set_nmi_callback(crash_nmi_callback); + if (register_die_notifier(&crash_nmi_nb)) + return; /* return what? */ /* * Ensure the new callback function is set before sending diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index f6b881b23a7..9d175dcf3a2 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -41,20 +41,6 @@ static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); */ #define NMI_MAX_COUNTER_BITS 66 -/* - * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: - * - it may be reserved by some other driver, or not - * - when not reserved by some other driver, it may be used for - * the NMI watchdog, or not - * - * This is maintained separately from nmi_active because the NMI - * watchdog may also be driven from the I/O APIC timer. - */ -static DEFINE_SPINLOCK(lapic_nmi_owner_lock); -static unsigned int lapic_nmi_owner; -#define LAPIC_NMI_WATCHDOG (1<<0) -#define LAPIC_NMI_RESERVED (1<<1) - /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled * <0: the lapic NMI watchdog has not been set up, and cannot @@ -321,33 +307,6 @@ static void enable_lapic_nmi_watchdog(void) touch_nmi_watchdog(); } -int reserve_lapic_nmi(void) -{ - unsigned int old_owner; - - spin_lock(&lapic_nmi_owner_lock); - old_owner = lapic_nmi_owner; - lapic_nmi_owner |= LAPIC_NMI_RESERVED; - spin_unlock(&lapic_nmi_owner_lock); - if (old_owner & LAPIC_NMI_RESERVED) - return -EBUSY; - if (old_owner & LAPIC_NMI_WATCHDOG) - disable_lapic_nmi_watchdog(); - return 0; -} - -void release_lapic_nmi(void) -{ - unsigned int new_owner; - - spin_lock(&lapic_nmi_owner_lock); - new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; - lapic_nmi_owner = new_owner; - spin_unlock(&lapic_nmi_owner_lock); - if (new_owner & LAPIC_NMI_WATCHDOG) - enable_lapic_nmi_watchdog(); -} - void disable_timer_nmi_watchdog(void) { BUG_ON(nmi_watchdog != NMI_IO_APIC); @@ -762,13 +721,6 @@ done: return rc; } -static __kprobes int dummy_nmi_callback(struct pt_regs * regs, int cpu) -{ - return 0; -} - -static nmi_callback_t nmi_callback = dummy_nmi_callback; - asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) { nmi_enter(); @@ -779,21 +731,12 @@ asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) int do_nmi_callback(struct pt_regs * regs, int cpu) { - return rcu_dereference(nmi_callback)(regs, cpu); -} - -void set_nmi_callback(nmi_callback_t callback) -{ - vmalloc_sync_all(); - rcu_assign_pointer(nmi_callback, callback); -} -EXPORT_SYMBOL_GPL(set_nmi_callback); - -void unset_nmi_callback(void) -{ - nmi_callback = dummy_nmi_callback; +#ifdef CONFIG_SYSCTL + if (unknown_nmi_panic) + return unknown_nmi_panic_callback(regs, cpu); +#endif + return 0; } -EXPORT_SYMBOL_GPL(unset_nmi_callback); #ifdef CONFIG_SYSCTL @@ -802,37 +745,8 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) unsigned char reason = get_nmi_reason(); char buf[64]; - if (!(reason & 0xc0)) { - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(buf,regs); - } - return 0; -} - -/* - * proc handler for /proc/sys/kernel/unknown_nmi_panic - */ -int proc_unknown_nmi_panic(struct ctl_table *table, int write, struct file *file, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - old_state = unknown_nmi_panic; - proc_dointvec(table, write, file, buffer, length, ppos); - if (!!old_state == !!unknown_nmi_panic) - return 0; - - if (unknown_nmi_panic) { - if (reserve_lapic_nmi() < 0) { - unknown_nmi_panic = 0; - return -EBUSY; - } else { - set_nmi_callback(unknown_nmi_panic_callback); - } - } else { - release_lapic_nmi(); - unset_nmi_callback(); - } + sprintf(buf, "NMI received for unknown reason %02x\n", reason); + die_nmi(buf,regs); return 0; } @@ -846,8 +760,6 @@ EXPORT_SYMBOL(reserve_perfctr_nmi); EXPORT_SYMBOL(release_perfctr_nmi); EXPORT_SYMBOL(reserve_evntsel_nmi); EXPORT_SYMBOL(release_evntsel_nmi); -EXPORT_SYMBOL(reserve_lapic_nmi); -EXPORT_SYMBOL(release_lapic_nmi); EXPORT_SYMBOL(disable_timer_nmi_watchdog); EXPORT_SYMBOL(enable_timer_nmi_watchdog); EXPORT_SYMBOL(touch_nmi_watchdog); diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index da0e0b4e913..34d6bf063b6 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h @@ -6,24 +6,13 @@ #include -struct pt_regs; - -typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu); - -/** - * set_nmi_callback - * - * Set a handler for an NMI. Only one handler may be - * set. Return 1 if the NMI was handled. - */ -void set_nmi_callback(nmi_callback_t callback); - /** - * unset_nmi_callback + * do_nmi_callback * - * Remove the handler previously set. + * Check to see if a callback exists and execute it. Return 1 + * if the handler exists and was handled successfully. */ -void unset_nmi_callback(void); +int do_nmi_callback(struct pt_regs *regs, int cpu); extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int avail_to_resrv_perfctr_nmi(unsigned int); @@ -33,8 +22,6 @@ extern int reserve_evntsel_nmi(unsigned int); extern void release_evntsel_nmi(unsigned int); extern void setup_apic_nmi_watchdog (void *); -extern int reserve_lapic_nmi(void); -extern void release_lapic_nmi(void); extern void disable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void); extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index 8f02a2a416e..8818c39d34e 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h @@ -7,25 +7,6 @@ #include #include -struct pt_regs; - -typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu); - -/** - * set_nmi_callback - * - * Set a handler for an NMI. Only one handler may be - * set. Return 1 if the NMI was handled. - */ -void set_nmi_callback(nmi_callback_t callback); - -/** - * unset_nmi_callback - * - * Remove the handler previously set. - */ -void unset_nmi_callback(void); - /** * do_nmi_callback * @@ -72,8 +53,6 @@ extern int reserve_evntsel_nmi(unsigned int); extern void release_evntsel_nmi(unsigned int); extern void setup_apic_nmi_watchdog (void *); -extern int reserve_lapic_nmi(void); -extern void release_lapic_nmi(void); extern void disable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void); extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 362a0cc3713..83f16836162 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -76,8 +76,6 @@ extern int compat_log; #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) int unknown_nmi_panic; -extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); #endif /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ @@ -628,7 +626,7 @@ static ctl_table kern_table[] = { .data = &unknown_nmi_panic, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = &proc_unknown_nmi_panic, + .proc_handler = &proc_dointvec, }, #endif #if defined(CONFIG_X86) -- cgit v1.2.3-70-g09d2 From 407984f1af259b31957c7c05075a454a751bb801 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] x86: Add abilty to enable/disable nmi watchdog with sysctl Adds a new /proc/sys/kernel/nmi call that will enable/disable the nmi watchdog. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/kernel/nmi.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86_64/kernel/nmi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ include/asm-i386/nmi.h | 1 + include/asm-x86_64/nmi.h | 1 + include/linux/sysctl.h | 1 + kernel/sysctl.c | 11 ++++++++++ 6 files changed, 114 insertions(+) (limited to 'kernel') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index acd3fdea2a2..28065d0b71a 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -846,6 +846,58 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) return 0; } +/* + * proc handler for /proc/sys/kernel/nmi_watchdog + */ +int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int old_state; + + nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; + old_state = nmi_watchdog_enabled; + proc_dointvec(table, write, file, buffer, length, ppos); + if (!!old_state == !!nmi_watchdog_enabled) + return 0; + + if (atomic_read(&nmi_active) < 0) { + printk(KERN_WARNING "NMI watchdog is permanently disabled\n"); + return -EINVAL; + } + + if (nmi_watchdog == NMI_DEFAULT) { + if (nmi_known_cpu() > 0) + nmi_watchdog = NMI_LOCAL_APIC; + else + nmi_watchdog = NMI_IO_APIC; + } + + if (nmi_watchdog == NMI_LOCAL_APIC) + { + if (nmi_watchdog_enabled) + enable_lapic_nmi_watchdog(); + else + disable_lapic_nmi_watchdog(); + } else if (nmi_watchdog == NMI_IO_APIC) { + /* FIXME + * for some reason these functions don't work + */ + printk("Can not enable/disable NMI on IO APIC\n"); + return -EINVAL; +#if 0 + if (nmi_watchdog_enabled) + enable_timer_nmi_watchdog(); + else + disable_timer_nmi_watchdog(); +#endif + } else { + printk( KERN_WARNING + "NMI watchdog doesn't know what hardware to touch\n"); + return -EIO; + } + return 0; +} + #endif EXPORT_SYMBOL(nmi_active); diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 9d175dcf3a2..3a17411a9a1 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -750,6 +750,54 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) return 0; } +/* + * proc handler for /proc/sys/kernel/nmi + */ +int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int old_state; + + nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; + old_state = nmi_watchdog_enabled; + proc_dointvec(table, write, file, buffer, length, ppos); + if (!!old_state == !!nmi_watchdog_enabled) + return 0; + + if (atomic_read(&nmi_active) < 0) { + printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); + return -EINVAL; + } + + /* if nmi_watchdog is not set yet, then set it */ + nmi_watchdog_default(); + + if (nmi_watchdog == NMI_LOCAL_APIC) + { + if (nmi_watchdog_enabled) + enable_lapic_nmi_watchdog(); + else + disable_lapic_nmi_watchdog(); + } else if (nmi_watchdog == NMI_IO_APIC) { + /* FIXME + * for some reason these functions don't work + */ + printk("Can not enable/disable NMI on IO APIC\n"); + return -EIO; +#if 0 + if (nmi_watchdog_enabled) + enable_timer_nmi_watchdog(); + else + disable_timer_nmi_watchdog(); +#endif + } else { + printk(KERN_WARNING + "NMI watchdog doesn't know what hardware to touch\n"); + return -EIO; + } + return 0; +} + #endif EXPORT_SYMBOL(nmi_active); diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index 34d6bf063b6..13b5d8311bf 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h @@ -14,6 +14,7 @@ */ int do_nmi_callback(struct pt_regs *regs, int cpu); +extern int nmi_watchdog_enabled; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int avail_to_resrv_perfctr_nmi(unsigned int); extern int reserve_perfctr_nmi(unsigned int); diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index 8818c39d34e..2c23b0df87d 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h @@ -43,6 +43,7 @@ extern void die_nmi(char *str, struct pt_regs *regs); extern int panic_on_timeout; extern int unknown_nmi_panic; +extern int nmi_watchdog_enabled; extern int check_nmi_watchdog(void); extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 736ed917a4f..ecb79ba52ae 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -150,6 +150,7 @@ enum KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ KERN_COMPAT_LOG=73, /* int: print compat layer messages */ KERN_MAX_LOCK_DEPTH=74, + KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 83f16836162..040de6bd74d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -76,6 +76,9 @@ extern int compat_log; #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) int unknown_nmi_panic; +int nmi_watchdog_enabled; +extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, + void __user *, size_t *, loff_t *); #endif /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ @@ -628,6 +631,14 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = KERN_NMI_WATCHDOG, + .procname = "nmi_watchdog", + .data = &nmi_watchdog_enabled, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_nmi_enabled, + }, #endif #if defined(CONFIG_X86) { -- cgit v1.2.3-70-g09d2 From 8da5adda91df3d2fcc5300e68da491694c9af019 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] x86: Allow users to force a panic on NMI To quote Alan Cox: The default Linux behaviour on an NMI of either memory or unknown is to continue operation. For many environments such as scientific computing it is preferable that the box is taken out and the error dealt with than an uncorrected parity/ECC error get propogated. A small number of systems do generate NMI's for bizarre random reasons such as power management so the default is unchanged. In other respects the new proc/sys entry works like the existing panic controls already in that directory. This is separate to the edac support - EDAC allows supported chipsets to handle ECC errors well, this change allows unsupported cases to at least panic rather than cause problems further down the line. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 6 ++++++ arch/x86_64/kernel/traps.c | 6 ++++++ include/linux/kernel.h | 1 + include/linux/sysctl.h | 1 + kernel/panic.c | 1 + kernel/sysctl.c | 8 ++++++++ 6 files changed, 23 insertions(+) (limited to 'kernel') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 7db664d0b25..2f6cb827648 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -635,6 +635,8 @@ static void mem_parity_error(unsigned char reason, struct pt_regs * regs) "to continue\n"); printk(KERN_EMERG "You probably have a hardware problem with your RAM " "chips\n"); + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); /* Clear and disable the memory parity error line. */ clear_mem_error(reason); @@ -670,6 +672,10 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) reason, smp_processor_id()); printk("Dazed and confused, but trying to continue\n"); printk("Do you have a strange power saving mode enabled?\n"); + + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + } static DEFINE_SPINLOCK(nmi_print_lock); diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 42bc070fdf1..b18829db2a6 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -732,6 +732,8 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); printk("You probably have a hardware problem with your RAM chips\n"); + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); /* Clear and disable the memory parity error line. */ reason = (reason & 0xf) | 4; @@ -757,6 +759,10 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs) { printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); printk("Dazed and confused, but trying to continue\n"); printk("Do you have a strange power saving mode enabled?\n"); + + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + } /* Runs on IST stack. This code must keep interrupts off all the time. diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b2ae4fdce8..1ff9609300b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -186,6 +186,7 @@ extern void bust_spinlocks(int yes); extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ extern int panic_timeout; extern int panic_on_oops; +extern int panic_on_unrecovered_nmi; extern int tainted; extern const char *print_tainted(void); extern void add_taint(unsigned); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index ecb79ba52ae..432778446ad 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -151,6 +151,7 @@ enum KERN_COMPAT_LOG=73, /* int: print compat layer messages */ KERN_MAX_LOCK_DEPTH=74, KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ + KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ }; diff --git a/kernel/panic.c b/kernel/panic.c index 8010b9b17ac..d2db3e2209e 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -21,6 +21,7 @@ #include int panic_on_oops; +int panic_on_unrecovered_nmi; int tainted; static int pause_on_oops; static int pause_on_oops_flag; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 040de6bd74d..220e2056412 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -641,6 +641,14 @@ static ctl_table kern_table[] = { }, #endif #if defined(CONFIG_X86) + { + .ctl_name = KERN_PANIC_ON_NMI, + .procname = "panic_on_unrecovered_nmi", + .data = &panic_on_unrecovered_nmi, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = KERN_BOOTLOADER_TYPE, .procname = "bootloader_type", -- cgit v1.2.3-70-g09d2 From 3cfc348bf90ffaa777c188652aa297f04eb94de8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:28 +0200 Subject: [PATCH] x86: Add portable getcpu call For NUMA optimization and some other algorithms it is useful to have a fast to get the current CPU and node numbers in user space. x86-64 added a fast way to do this in a vsyscall. This adds a generic syscall for other architectures to make it a generic portable facility. I expect some of them will also implement it as a faster vsyscall. The cache is an optimization for the x86-64 vsyscall optimization. Since what the syscall returns is an approximation anyways and user space often wants very fast results it can be cached for some time. The norma methods to get this information in user space are relatively slow The vsyscall is in a better position to manage the cache because it has direct access to a fast time stamp (jiffies). For the generic syscall optimization it doesn't help much, but enforce a valid argument to keep programs portable I only added an i386 syscall entry for now. Other architectures can follow as needed. AK: Also added some cleanups from Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/syscall_table.S | 1 + arch/x86_64/ia32/ia32entry.S | 1 + include/asm-i386/unistd.h | 3 ++- include/linux/syscalls.h | 2 ++ kernel/sys.c | 31 +++++++++++++++++++++++++++++++ 5 files changed, 37 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index dd63d477539..7e639f78b0b 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -317,3 +317,4 @@ ENTRY(sys_call_table) .long sys_tee /* 315 */ .long sys_vmsplice .long sys_move_pages + .long sys_getcpu diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 30ed0f6f4a2..32fd32bea07 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -713,4 +713,5 @@ ia32_sys_call_table: .quad sys_tee .quad compat_sys_vmsplice .quad compat_sys_move_pages + .quad sys_getcpu ia32_syscall_end: diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index fc1c8ddae14..565d0897b20 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -323,10 +323,11 @@ #define __NR_tee 315 #define __NR_vmsplice 316 #define __NR_move_pages 317 +#define __NR_getcpu 318 #ifdef __KERNEL__ -#define NR_syscalls 318 +#define NR_syscalls 319 /* * user-visible error numbers are in the range -1 - -128: see diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 008f04c5673..3f0f716225e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -53,6 +53,7 @@ struct mq_attr; struct compat_stat; struct compat_timeval; struct robust_list_head; +struct getcpu_cache; #include #include @@ -596,5 +597,6 @@ asmlinkage long sys_get_robust_list(int pid, size_t __user *len_ptr); asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len); +asmlinkage long sys_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *cache); #endif diff --git a/kernel/sys.c b/kernel/sys.c index e236f98f7ec..3f894775488 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -2062,3 +2063,33 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, } return error; } + +asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, + struct getcpu_cache __user *cache) +{ + int err = 0; + int cpu = raw_smp_processor_id(); + if (cpup) + err |= put_user(cpu, cpup); + if (nodep) + err |= put_user(cpu_to_node(cpu), nodep); + if (cache) { + /* + * The cache is not needed for this implementation, + * but make sure user programs pass something + * valid. vsyscall implementations can instead make + * good use of the cache. Only use t0 and t1 because + * these are available in both 32bit and 64bit ABI (no + * need for a compat_getcpu). 32bit has enough + * padding + */ + unsigned long t0, t1; + get_user(t0, &cache->t0); + get_user(t1, &cache->t1); + t0++; + t1++; + put_user(t0, &cache->t0); + put_user(t1, &cache->t1); + } + return err ? -EFAULT : 0; +} -- cgit v1.2.3-70-g09d2 From 0cb91a2293648507886563ccb91979cfc94d6a4b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:28 +0200 Subject: [PATCH] i386: Account spinlocks to the caller during profiling for !FP kernels This ports the algorithm from x86-64 (with improvements) to i386. Previously this only worked for frame pointer enabled kernels. But spinlocks have a very simple stack frame that can be manually analyzed. Do this. Signed-off-by: Andi Kleen --- arch/i386/kernel/time.c | 23 +++++++++++++++++++---- include/asm-i386/ptrace.h | 4 ---- kernel/spinlock.c | 5 +++++ 3 files changed, 24 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index edd00f6cee3..5af802ef00b 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -130,18 +130,33 @@ static int set_rtc_mmss(unsigned long nowtime) int timer_ack; -#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); - if (!user_mode_vm(regs) && in_lock_functions(pc)) +#ifdef CONFIG_SMP + if (!user_mode_vm(regs) && in_lock_functions(pc)) { +#ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->ebp + 4); - +#else + unsigned long *sp; + if ((regs->xcs & 3) == 0) + sp = (unsigned long *)®s->esp; + else + sp = (unsigned long *)regs->esp; + /* Return address is either directly at stack pointer + or above a saved eflags. Eflags has bits 22-31 zero, + kernel addresses don't. */ + if (sp[0] >> 22) + return sp[0]; + if (sp[1] >> 22) + return sp[1]; +#endif + } +#endif return pc; } EXPORT_SYMBOL(profile_pc); -#endif /* * This is the same as the above, except we _also_ save the current diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index f324c53b6f9..30a442ec205 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h @@ -80,11 +80,7 @@ static inline int user_mode_vm(struct pt_regs *regs) return ((regs->xcs & 3) | (regs->eflags & VM_MASK)) != 0; } #define instruction_pointer(regs) ((regs)->eip) -#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) extern unsigned long profile_pc(struct pt_regs *regs); -#else -#define profile_pc(regs) instruction_pointer(regs) -#endif #endif /* __KERNEL__ */ #endif diff --git a/kernel/spinlock.c b/kernel/spinlock.c index fb524b009ee..9644a41e0be 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -7,6 +7,11 @@ * * This file contains the spinlock/rwlock implementations for the * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them) + * + * Note that some architectures have special knowledge about the + * stack frames of these functions in their profile_pc. If you + * change anything significant here that could change the stack + * frame contact the architecture maintainers. */ #include -- cgit v1.2.3-70-g09d2 From 5a1b3999d6cb7ab87f1f3b1700bc91839fd6fa29 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:34 +0200 Subject: [PATCH] x86: Some preparationary cleanup for stack trace - Remove unused all_contexts parameter No caller used it - Move skip argument into the structure (needed for followon patches) Cc: mingo@elte.hu Signed-off-by: Andi Kleen --- arch/i386/kernel/stacktrace.c | 11 +++-------- arch/s390/kernel/stacktrace.c | 17 ++++++++--------- arch/x86_64/kernel/stacktrace.c | 14 +++++--------- include/linux/stacktrace.h | 7 ++++--- kernel/lockdep.c | 5 ++++- 5 files changed, 24 insertions(+), 30 deletions(-) (limited to 'kernel') diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c index e62a037ab39..ae3c32a87ad 100644 --- a/arch/i386/kernel/stacktrace.c +++ b/arch/i386/kernel/stacktrace.c @@ -61,12 +61,8 @@ save_context_stack(struct stack_trace *trace, unsigned int skip, /* * Save stack-backtrace addresses into a stack_trace buffer. - * If all_contexts is set, all contexts (hardirq, softirq and process) - * are saved. If not set then only the current context is saved. */ -void save_stack_trace(struct stack_trace *trace, - struct task_struct *task, int all_contexts, - unsigned int skip) +void save_stack_trace(struct stack_trace *trace, struct task_struct *task) { unsigned long ebp; unsigned long *stack = &ebp; @@ -85,10 +81,9 @@ void save_stack_trace(struct stack_trace *trace, struct thread_info *context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - ebp = save_context_stack(trace, skip, context, stack, ebp); + ebp = save_context_stack(trace, trace->skip, context, stack, ebp); stack = (unsigned long *)context->previous_esp; - if (!all_contexts || !stack || - trace->nr_entries >= trace->max_entries) + if (!stack || trace->nr_entries >= trace->max_entries) break; trace->entries[trace->nr_entries++] = ULONG_MAX; if (trace->nr_entries >= trace->max_entries) diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index de83f38288d..d9428a0fc8f 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -59,9 +59,7 @@ static inline unsigned long save_context_stack(struct stack_trace *trace, } } -void save_stack_trace(struct stack_trace *trace, - struct task_struct *task, int all_contexts, - unsigned int skip) +void save_stack_trace(struct stack_trace *trace, struct task_struct *task) { register unsigned long sp asm ("15"); unsigned long orig_sp; @@ -69,22 +67,23 @@ void save_stack_trace(struct stack_trace *trace, sp &= PSW_ADDR_INSN; orig_sp = sp; - sp = save_context_stack(trace, &skip, sp, + sp = save_context_stack(trace, &trace->skip, sp, S390_lowcore.panic_stack - PAGE_SIZE, S390_lowcore.panic_stack); - if ((sp != orig_sp) && !all_contexts) + if ((sp != orig_sp) && !trace->all_contexts) return; - sp = save_context_stack(trace, &skip, sp, + sp = save_context_stack(trace, &trace->skip, sp, S390_lowcore.async_stack - ASYNC_SIZE, S390_lowcore.async_stack); - if ((sp != orig_sp) && !all_contexts) + if ((sp != orig_sp) && !trace->all_contexts) return; if (task) - save_context_stack(trace, &skip, sp, + save_context_stack(trace, &trace->skip, sp, (unsigned long) task_stack_page(task), (unsigned long) task_stack_page(task) + THREAD_SIZE); else - save_context_stack(trace, &skip, sp, S390_lowcore.thread_info, + save_context_stack(trace, &trace->skip, sp, + S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); return; } diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c index 32cf55eb9af..1c022af8fe1 100644 --- a/arch/x86_64/kernel/stacktrace.c +++ b/arch/x86_64/kernel/stacktrace.c @@ -109,9 +109,10 @@ out_restore: * Save stack-backtrace addresses into a stack_trace buffer: */ static inline unsigned long -save_context_stack(struct stack_trace *trace, unsigned int skip, +save_context_stack(struct stack_trace *trace, unsigned long stack, unsigned long stack_end) { + int skip = trace->skip; unsigned long addr; #ifdef CONFIG_FRAME_POINTER @@ -159,12 +160,8 @@ save_context_stack(struct stack_trace *trace, unsigned int skip, /* * Save stack-backtrace addresses into a stack_trace buffer. - * If all_contexts is set, all contexts (hardirq, softirq and process) - * are saved. If not set then only the current context is saved. */ -void save_stack_trace(struct stack_trace *trace, - struct task_struct *task, int all_contexts, - unsigned int skip) +void save_stack_trace(struct stack_trace *trace, struct task_struct *task) { unsigned long stack = (unsigned long)&stack; int i, nr_stacks = 0, stacks_done[MAX_STACKS]; @@ -207,9 +204,8 @@ void save_stack_trace(struct stack_trace *trace, return; stacks_done[nr_stacks] = stack_end; - stack = save_context_stack(trace, skip, stack, stack_end); - if (!all_contexts || !stack || - trace->nr_entries >= trace->max_entries) + stack = save_context_stack(trace, stack, stack_end); + if (!stack || trace->nr_entries >= trace->max_entries) return; trace->entries[trace->nr_entries++] = ULONG_MAX; if (trace->nr_entries >= trace->max_entries) diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 9cc81e57222..50e2b01e517 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -5,15 +5,16 @@ struct stack_trace { unsigned int nr_entries, max_entries; unsigned long *entries; + int skip; /* input argument: How many entries to skip */ + int all_contexts; /* input argument: if true do than one stack */ }; extern void save_stack_trace(struct stack_trace *trace, - struct task_struct *task, int all_contexts, - unsigned int skip); + struct task_struct *task); extern void print_stack_trace(struct stack_trace *trace, int spaces); #else -# define save_stack_trace(trace, task, all, skip) do { } while (0) +# define save_stack_trace(trace, task) do { } while (0) # define print_stack_trace(trace) do { } while (0) #endif diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 9bad1788451..900b4cb1a02 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -224,7 +224,10 @@ static int save_trace(struct stack_trace *trace) trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; trace->entries = stack_trace + nr_stack_trace_entries; - save_stack_trace(trace, NULL, 0, 3); + trace->skip = 3; + trace->all_contexts = 0; + + save_stack_trace(trace, NULL); trace->max_entries = trace->nr_entries; -- cgit v1.2.3-70-g09d2 From 3fa7c794fe4dc127f7fac3fad4d13628e68f89ce Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:34 +0200 Subject: [PATCH] Avoid recursion in lockdep when stack tracer takes locks The new dwarf2 unwinder needs to take locks to do backtraces inside modules. This patch makes sure lockdep which calls stacktrace is not reentered. Thanks to Ingo for suggesting this simpler approach. Cc: mingo@elte.hu Signed-off-by: Andi Kleen --- kernel/lockdep.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 900b4cb1a02..c088e5542e8 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -227,7 +227,11 @@ static int save_trace(struct stack_trace *trace) trace->skip = 3; trace->all_contexts = 0; + /* Make sure to not recurse in case the the unwinder needs to tak +e locks. */ + lockdep_off(); save_stack_trace(trace, NULL); + lockdep_on(); trace->max_entries = trace->nr_entries; -- cgit v1.2.3-70-g09d2 From 0a4254058037eb172758961d0a5b94f4320a1425 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 26 Sep 2006 10:52:38 +0200 Subject: [PATCH] Add the canary field to the PDA area and the task struct This patch adds the per thread cookie field to the task struct and the PDA. Also it makes sure that the PDA value gets the new cookie value at context switch, and that a new task gets a new cookie at task creation time. Signed-off-by: Arjan van Ven Signed-off-by: Ingo Molnar Signed-off-by: Andi Kleen CC: Andi Kleen --- arch/x86_64/kernel/process.c | 8 ++++++++ include/asm-x86_64/pda.h | 7 ++++++- include/linux/sched.h | 5 +++++ kernel/fork.c | 5 +++++ 4 files changed, 24 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 9e9a70e50c7..fba8dfeda67 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -625,6 +625,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) unlazy_fpu(prev_p); write_pda(kernelstack, task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); +#ifdef CONFIG_CC_STACKPROTECTOR + write_pda(stack_canary, next_p->stack_canary); + /* + * Build time only check to make sure the stack_canary is at + * offset 40 in the pda; this is a gcc ABI requirement + */ + BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); +#endif /* * Now maybe reload the debug registers and handle I/O bitmaps diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h index 6794ffaae43..e7773e0af86 100644 --- a/include/asm-x86_64/pda.h +++ b/include/asm-x86_64/pda.h @@ -16,7 +16,12 @@ struct x8664_pda { unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts with -1 */ int cpunumber; /* 36 Logical CPU number */ - char *irqstackptr; /* 40 top of irqstack */ +#ifdef CONFIG_CC_STACKPROTECTOR + unsigned long stack_canary; /* 40 stack canary value */ + /* gcc-ABI: this canary MUST be at + offset 40!!! */ +#endif + char *irqstackptr; int nodenumber; /* number of current node */ unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 807556c5bcd..9d4aa7f95bc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -819,6 +819,11 @@ struct task_struct { unsigned did_exec:1; pid_t pid; pid_t tgid; + +#ifdef CONFIG_CC_STACKPROTECTOR + /* Canary value for the -fstack-protector gcc feature */ + unsigned long stack_canary; +#endif /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with diff --git a/kernel/fork.c b/kernel/fork.c index f9b014e3e70..a0dad84567c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -175,6 +176,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) tsk->thread_info = ti; setup_thread_stack(tsk, orig); +#ifdef CONFIG_CC_STACKPROTECTOR + tsk->stack_canary = get_random_int(); +#endif + /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); atomic_set(&tsk->fs_excl, 0); -- cgit v1.2.3-70-g09d2 From 3162f751d04086a9d006342de63ac8f44fe0f72a Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 26 Sep 2006 10:52:39 +0200 Subject: [PATCH] Add the __stack_chk_fail() function GCC emits a call to a __stack_chk_fail() function when the stack canary is not matching the expected value. Since this is a bad security issue; lets panic the kernel rather than limping along; the kernel really can't be trusted anymore when this happens. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Andi Kleen CC: Andi Kleen --- kernel/panic.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel') diff --git a/kernel/panic.c b/kernel/panic.c index d2db3e2209e..6ceb664fb52 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -271,3 +271,15 @@ void oops_exit(void) { do_oops_enter_exit(); } + +#ifdef CONFIG_CC_STACKPROTECTOR +/* + * Called when gcc's -fstack-protector feature is used, and + * gcc detects corruption of the on-stack canary value + */ +void __stack_chk_fail(void) +{ + panic("stack-protector: Kernel stack is corrupted"); +} +EXPORT_SYMBOL(__stack_chk_fail); +#endif -- cgit v1.2.3-70-g09d2 From adf1423698f00d00b267f7dca8231340ce7d65ef Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 26 Sep 2006 10:52:41 +0200 Subject: [PATCH] i386/x86-64: Work around gcc bug with noreturn functions in unwinder Current gcc generates calls not jumps to noreturn functions. When that happens the return address can point to the next function, which confuses the unwinder. This patch works around it by marking asynchronous exception frames in contrast normal call frames in the unwind information. Then teach the unwinder to decode this. For normal call frames the unwinder now subtracts one from the address which avoids this problem. The standard libgcc unwinder uses the same trick. It doesn't include adjustment of the printed address (i.e. for the original example, it'd still be kernel_math_error+0 that gets displayed, but the unwinder wouldn't get confused anymore. This only works with binutils 2.6.17+ and some versions of H.J.Lu's 2.6.16 unfortunately because earlier binutils don't support .cfi_signal_frame [AK: added automatic detection of the new binutils and wrote description] Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/i386/Makefile | 4 ++++ arch/i386/kernel/entry.S | 4 ++++ arch/x86_64/Makefile | 4 ++++ arch/x86_64/ia32/ia32entry.S | 4 ++++ arch/x86_64/kernel/entry.S | 4 ++++ include/asm-i386/dwarf2.h | 7 +++++++ include/asm-i386/unwind.h | 5 +++++ include/asm-x86_64/dwarf2.h | 6 ++++++ include/asm-x86_64/unwind.h | 5 +++++ kernel/unwind.c | 35 ++++++++++++++++++++++++++++------- scripts/Kbuild.include | 4 ++-- 11 files changed, 73 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 508cdbeb3a0..7cc0b189b82 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -50,6 +50,10 @@ CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-op cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) +# is .cfi_signal_frame supported too? +cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) +AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) + CFLAGS += $(cflags-y) # Default subarch .c files diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 284f2e908ad..5a63d6fdb70 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -183,18 +183,21 @@ VM_MASK = 0x00020000 #define RING0_INT_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, 3*4;\ /*CFI_OFFSET cs, -2*4;*/\ CFI_OFFSET eip, -3*4 #define RING0_EC_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, 4*4;\ /*CFI_OFFSET cs, -2*4;*/\ CFI_OFFSET eip, -3*4 #define RING0_PTREGS_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, OLDESP-EBX;\ /*CFI_OFFSET cs, CS-OLDESP;*/\ CFI_OFFSET eip, EIP-OLDESP;\ @@ -275,6 +278,7 @@ need_resched: # sysenter call handler stub ENTRY(sysenter_entry) CFI_STARTPROC simple + CFI_SIGNAL_FRAME CFI_DEF_CFA esp, 0 CFI_REGISTER esp, ebp movl TSS_sysenter_esp0(%esp),%esp diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 2b8d07c7010..1c0f18d4f88 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -58,6 +58,10 @@ cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) +# is .cfi_signal_frame supported too? +cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,) +AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,) + cflags-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) -fstack-protector ) cflags-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) -fstack-protector-all ) diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 32fd32bea07..b4aa875e175 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -71,6 +71,7 @@ */ ENTRY(ia32_sysenter_target) CFI_STARTPROC32 simple + CFI_SIGNAL_FRAME CFI_DEF_CFA rsp,0 CFI_REGISTER rsp,rbp swapgs @@ -186,6 +187,7 @@ ENDPROC(ia32_sysenter_target) */ ENTRY(ia32_cstar_target) CFI_STARTPROC32 simple + CFI_SIGNAL_FRAME CFI_DEF_CFA rsp,PDA_STACKOFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ @@ -293,6 +295,7 @@ ia32_badarg: ENTRY(ia32_syscall) CFI_STARTPROC simple + CFI_SIGNAL_FRAME CFI_DEF_CFA rsp,SS+8-RIP /*CFI_REL_OFFSET ss,SS-RIP*/ CFI_REL_OFFSET rsp,RSP-RIP @@ -370,6 +373,7 @@ ENTRY(ia32_ptregs_common) popq %r11 CFI_ENDPROC CFI_STARTPROC32 simple + CFI_SIGNAL_FRAME CFI_DEF_CFA rsp,SS+8-ARGOFFSET CFI_REL_OFFSET rax,RAX-ARGOFFSET CFI_REL_OFFSET rcx,RCX-ARGOFFSET diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index ea32688386f..4cbc65290ae 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -123,6 +123,7 @@ .macro CFI_DEFAULT_STACK start=1 .if \start CFI_STARTPROC simple + CFI_SIGNAL_FRAME CFI_DEF_CFA rsp,SS+8 .else CFI_DEF_CFA_OFFSET SS+8 @@ -207,6 +208,7 @@ END(ret_from_fork) ENTRY(system_call) CFI_STARTPROC simple + CFI_SIGNAL_FRAME CFI_DEF_CFA rsp,PDA_STACKOFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ @@ -324,6 +326,7 @@ END(system_call) */ ENTRY(int_ret_from_sys_call) CFI_STARTPROC simple + CFI_SIGNAL_FRAME CFI_DEF_CFA rsp,SS+8-ARGOFFSET /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ CFI_REL_OFFSET rsp,RSP-ARGOFFSET @@ -484,6 +487,7 @@ END(stub_rt_sigreturn) */ .macro _frame ref CFI_STARTPROC simple + CFI_SIGNAL_FRAME CFI_DEF_CFA rsp,SS+8-\ref /*CFI_REL_OFFSET ss,SS-\ref*/ CFI_REL_OFFSET rsp,RSP-\ref diff --git a/include/asm-i386/dwarf2.h b/include/asm-i386/dwarf2.h index 5d1a8db5a9b..6d66398a307 100644 --- a/include/asm-i386/dwarf2.h +++ b/include/asm-i386/dwarf2.h @@ -28,6 +28,12 @@ #define CFI_RESTORE_STATE .cfi_restore_state #define CFI_UNDEFINED .cfi_undefined +#ifdef CONFIG_AS_CFI_SIGNAL_FRAME +#define CFI_SIGNAL_FRAME .cfi_signal_frame +#else +#define CFI_SIGNAL_FRAME +#endif + #else /* Due to the structure of pre-exisiting code, don't use assembler line @@ -48,6 +54,7 @@ #define CFI_REMEMBER_STATE ignore #define CFI_RESTORE_STATE ignore #define CFI_UNDEFINED ignore +#define CFI_SIGNAL_FRAME ignore #endif diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h index f0ac399bae3..5031d693b89 100644 --- a/include/asm-i386/unwind.h +++ b/include/asm-i386/unwind.h @@ -18,6 +18,7 @@ struct unwind_frame_info { struct pt_regs regs; struct task_struct *task; + unsigned call_frame:1; }; #define UNW_PC(frame) (frame)->regs.eip @@ -44,6 +45,10 @@ struct unwind_frame_info PTREGS_INFO(edi), \ PTREGS_INFO(eip) +#define UNW_DEFAULT_RA(raItem, dataAlign) \ + ((raItem).where == Memory && \ + !((raItem).value * (dataAlign) + 4)) + static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, /*const*/ struct pt_regs *regs) { diff --git a/include/asm-x86_64/dwarf2.h b/include/asm-x86_64/dwarf2.h index 2b9368365fa..eedc08526b0 100644 --- a/include/asm-x86_64/dwarf2.h +++ b/include/asm-x86_64/dwarf2.h @@ -28,6 +28,11 @@ #define CFI_REMEMBER_STATE .cfi_remember_state #define CFI_RESTORE_STATE .cfi_restore_state #define CFI_UNDEFINED .cfi_undefined +#ifdef CONFIG_AS_CFI_SIGNAL_FRAME +#define CFI_SIGNAL_FRAME .cfi_signal_frame +#else +#define CFI_SIGNAL_FRAME +#endif #else @@ -45,6 +50,7 @@ #define CFI_REMEMBER_STATE # #define CFI_RESTORE_STATE # #define CFI_UNDEFINED # +#define CFI_SIGNAL_FRAME # #endif diff --git a/include/asm-x86_64/unwind.h b/include/asm-x86_64/unwind.h index 1f6e9bfb569..b8fa5cb7ff8 100644 --- a/include/asm-x86_64/unwind.h +++ b/include/asm-x86_64/unwind.h @@ -18,6 +18,7 @@ struct unwind_frame_info { struct pt_regs regs; struct task_struct *task; + unsigned call_frame:1; }; #define UNW_PC(frame) (frame)->regs.rip @@ -57,6 +58,10 @@ struct unwind_frame_info PTREGS_INFO(r15), \ PTREGS_INFO(rip) +#define UNW_DEFAULT_RA(raItem, dataAlign) \ + ((raItem).where == Memory && \ + !((raItem).value * (dataAlign) + 8)) + static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, /*const*/ struct pt_regs *regs) { diff --git a/kernel/unwind.c b/kernel/unwind.c index f69c804c8e6..3430475fcd8 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c @@ -603,6 +603,7 @@ int unwind(struct unwind_frame_info *frame) #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) const u32 *fde = NULL, *cie = NULL; const u8 *ptr = NULL, *end = NULL; + unsigned long pc = UNW_PC(frame) - frame->call_frame; unsigned long startLoc = 0, endLoc = 0, cfa; unsigned i; signed ptrType = -1; @@ -612,7 +613,7 @@ int unwind(struct unwind_frame_info *frame) if (UNW_PC(frame) == 0) return -EINVAL; - if ((table = find_table(UNW_PC(frame))) != NULL + if ((table = find_table(pc)) != NULL && !(table->size & (sizeof(*fde) - 1))) { unsigned long tableSize = table->size; @@ -647,7 +648,7 @@ int unwind(struct unwind_frame_info *frame) ptrType & DW_EH_PE_indirect ? ptrType : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed)); - if (UNW_PC(frame) >= startLoc && UNW_PC(frame) < endLoc) + if (pc >= startLoc && pc < endLoc) break; cie = NULL; } @@ -657,16 +658,28 @@ int unwind(struct unwind_frame_info *frame) state.cieEnd = ptr; /* keep here temporarily */ ptr = (const u8 *)(cie + 2); end = (const u8 *)(cie + 1) + *cie; + frame->call_frame = 1; if ((state.version = *ptr) != 1) cie = NULL; /* unsupported version */ else if (*++ptr) { /* check if augmentation size is first (and thus present) */ if (*ptr == 'z') { - /* check for ignorable (or already handled) - * nul-terminated augmentation string */ - while (++ptr < end && *ptr) - if (strchr("LPR", *ptr) == NULL) + while (++ptr < end && *ptr) { + switch(*ptr) { + /* check for ignorable (or already handled) + * nul-terminated augmentation string */ + case 'L': + case 'P': + case 'R': + continue; + case 'S': + frame->call_frame = 0; + continue; + default: break; + } + break; + } } if (ptr >= end || *ptr) cie = NULL; @@ -755,7 +768,7 @@ int unwind(struct unwind_frame_info *frame) state.org = startLoc; memcpy(&state.cfa, &badCFA, sizeof(state.cfa)); /* process instructions */ - if (!processCFI(ptr, end, UNW_PC(frame), ptrType, &state) + if (!processCFI(ptr, end, pc, ptrType, &state) || state.loc > endLoc || state.regs[retAddrReg].where == Nowhere || state.cfa.reg >= ARRAY_SIZE(reg_info) @@ -763,6 +776,11 @@ int unwind(struct unwind_frame_info *frame) || state.cfa.offs % sizeof(unsigned long)) return -EIO; /* update frame */ +#ifndef CONFIG_AS_CFI_SIGNAL_FRAME + if(frame->call_frame + && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign)) + frame->call_frame = 0; +#endif cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs; startLoc = min((unsigned long)UNW_SP(frame), cfa); endLoc = max((unsigned long)UNW_SP(frame), cfa); @@ -866,6 +884,7 @@ int unwind_init_frame_info(struct unwind_frame_info *info, /*const*/ struct pt_regs *regs) { info->task = tsk; + info->call_frame = 0; arch_unw_init_frame_info(info, regs); return 0; @@ -879,6 +898,7 @@ int unwind_init_blocked(struct unwind_frame_info *info, struct task_struct *tsk) { info->task = tsk; + info->call_frame = 0; arch_unw_init_blocked(info); return 0; @@ -894,6 +914,7 @@ int unwind_init_running(struct unwind_frame_info *info, void *arg) { info->task = current; + info->call_frame = 0; return arch_unwind_init_running(info, callback, arg); } diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 7adef12a0c2..4f5ff19b992 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -66,8 +66,8 @@ as-option = $(shell if $(CC) $(CFLAGS) $(1) -Wa,-Z -c -o /dev/null \ # as-instr # Usage: cflags-y += $(call as-instr, instr, option1, option2) -as-instr = $(shell if echo -e "$(1)" | $(AS) -Z -o astest$$$$.out \ - 2>&1 >/dev/null ; then echo "$(2)"; else echo "$(3)"; fi; \ +as-instr = $(shell if echo -e "$(1)" | $(AS) >/dev/null 2>&1 -W -Z -o astest$$$$.out ; \ + then echo "$(2)"; else echo "$(3)"; fi; \ rm -f astest$$$$.out) # cc-option -- cgit v1.2.3-70-g09d2 From d33b6fba2c4350651f3f61ff2ab858a2f116e9a4 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 30 Jun 2006 02:31:24 -0700 Subject: Resources: insert identical resources above existing resources If you have two resources which aree exactly the same size, insert_resource() currently inserts the new one below the existing one. This is wrong because there's no way to insert a resource of the same size above an existing one. I took this opportunity to rewrite the initial loop to be a for-loop instead of a goto-loop and fix the documentation. Signed-off-by: Matthew Wilcox Cc: Ivan Kokshaysky Cc: Dominik Brodowski Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- kernel/resource.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 46286434af8..9db38a1a752 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -344,12 +344,11 @@ EXPORT_SYMBOL(allocate_resource); * * Returns 0 on success, -EBUSY if the resource can't be inserted. * - * This function is equivalent of request_resource when no conflict + * This function is equivalent to request_resource when no conflict * happens. If a conflict happens, and the conflicting resources * entirely fit within the range of the new resource, then the new - * resource is inserted and the conflicting resources become childs of - * the new resource. Otherwise the new resource becomes the child of - * the conflicting resource + * resource is inserted and the conflicting resources become children of + * the new resource. */ int insert_resource(struct resource *parent, struct resource *new) { @@ -357,20 +356,21 @@ int insert_resource(struct resource *parent, struct resource *new) struct resource *first, *next; write_lock(&resource_lock); - begin: - result = 0; - first = __request_resource(parent, new); - if (!first) - goto out; - result = -EBUSY; - if (first == parent) - goto out; + for (;; parent = first) { + result = 0; + first = __request_resource(parent, new); + if (!first) + goto out; - /* Resource fully contained by the clashing resource? Recurse into it */ - if (first->start <= new->start && first->end >= new->end) { - parent = first; - goto begin; + result = -EBUSY; + if (first == parent) + goto out; + + if ((first->start > new->start) || (first->end < new->end)) + break; + if ((first->start == new->start) && (first->end == new->end)) + break; } for (next = first; ; next = next->sibling) { -- cgit v1.2.3-70-g09d2 From 0ec76a110f432e98277e464b82ace8dd66571689 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Sep 2006 01:50:15 -0700 Subject: [PATCH] NOMMU: Check that access_process_vm() has a valid target Check that access_process_vm() is accessing a valid mapping in the target process. This limits ptrace() accesses and accesses through /proc//maps to only those regions actually mapped by a program. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 54 ------------------------------------------------------ mm/memory.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/nommu.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 54 deletions(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 9a111f70145..8aad0331d82 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -241,60 +241,6 @@ int ptrace_detach(struct task_struct *child, unsigned int data) return 0; } -/* - * Access another process' address space. - * Source/target buffer must be kernel space, - * Do not walk the page table directly, use get_user_pages - */ - -int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) -{ - struct mm_struct *mm; - struct vm_area_struct *vma; - struct page *page; - void *old_buf = buf; - - mm = get_task_mm(tsk); - if (!mm) - return 0; - - down_read(&mm->mmap_sem); - /* ignore errors, just check how much was sucessfully transfered */ - while (len) { - int bytes, ret, offset; - void *maddr; - - ret = get_user_pages(tsk, mm, addr, 1, - write, 1, &page, &vma); - if (ret <= 0) - break; - - bytes = len; - offset = addr & (PAGE_SIZE-1); - if (bytes > PAGE_SIZE-offset) - bytes = PAGE_SIZE-offset; - - maddr = kmap(page); - if (write) { - copy_to_user_page(vma, page, addr, - maddr + offset, buf, bytes); - set_page_dirty_lock(page); - } else { - copy_from_user_page(vma, page, addr, - buf, maddr + offset, bytes); - } - kunmap(page); - page_cache_release(page); - len -= bytes; - buf += bytes; - addr += bytes; - } - up_read(&mm->mmap_sem); - mmput(mm); - - return buf - old_buf; -} - int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len) { int copied = 0; diff --git a/mm/memory.c b/mm/memory.c index f2ef1dcfff7..601159a46ab 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2604,3 +2604,56 @@ int in_gate_area_no_task(unsigned long addr) } #endif /* __HAVE_ARCH_GATE_AREA */ + +/* + * Access another process' address space. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ +int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +{ + struct mm_struct *mm; + struct vm_area_struct *vma; + struct page *page; + void *old_buf = buf; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + down_read(&mm->mmap_sem); + /* ignore errors, just check how much was sucessfully transfered */ + while (len) { + int bytes, ret, offset; + void *maddr; + + ret = get_user_pages(tsk, mm, addr, 1, + write, 1, &page, &vma); + if (ret <= 0) + break; + + bytes = len; + offset = addr & (PAGE_SIZE-1); + if (bytes > PAGE_SIZE-offset) + bytes = PAGE_SIZE-offset; + + maddr = kmap(page); + if (write) { + copy_to_user_page(vma, page, addr, + maddr + offset, buf, bytes); + set_page_dirty_lock(page); + } else { + copy_from_user_page(vma, page, addr, + buf, maddr + offset, bytes); + } + kunmap(page); + page_cache_release(page); + len -= bytes; + buf += bytes; + addr += bytes; + } + up_read(&mm->mmap_sem); + mmput(mm); + + return buf - old_buf; +} diff --git a/mm/nommu.c b/mm/nommu.c index d99dea31e44..d08acdae003 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1206,3 +1206,50 @@ struct page *filemap_nopage(struct vm_area_struct *area, BUG(); return NULL; } + +/* + * Access another process' address space. + * - source/target buffer must be kernel space + */ +int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +{ + struct vm_list_struct *vml; + struct vm_area_struct *vma; + struct mm_struct *mm; + + if (addr + len < addr) + return 0; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + down_read(&mm->mmap_sem); + + /* the access must start within one of the target process's mappings */ + for (vml = mm->context.vmlist; vml; vml = vml->next) + if (addr >= vml->vma->vm_start && addr < vml->vma->vm_end) + break; + + if (vml) { + vma = vml->vma; + + /* don't overrun this mapping */ + if (addr + len >= vma->vm_end) + len = vma->vm_end - addr; + + /* only read or write mappings where it is permitted */ + if (write && vma->vm_flags & VM_WRITE) + len -= copy_to_user((void *) addr, buf, len); + else if (!write && vma->vm_flags & VM_READ) + len -= copy_from_user(buf, (void *) addr, len); + else + len = 0; + } else { + len = 0; + } + + up_read(&mm->mmap_sem); + mmput(mm); + return len; +} -- cgit v1.2.3-70-g09d2 From f269fdd1829acc5e53bf57b145003e5733133f2b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Sep 2006 01:50:23 -0700 Subject: [PATCH] NOMMU: move the fallback arch_vma_name() to a sensible place Move the fallback arch_vma_name() to a sensible place (kernel/signal.c). Currently it's in fs/proc/task_mmu.c, a file that is dependent on both CONFIG_PROC_FS and CONFIG_MMU being enabled, but it's used from kernel/signal.c from where it is called unconditionally. [akpm@osdl.org: build fix] Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 5 ----- include/linux/mm.h | 2 +- kernel/signal.c | 5 +++++ 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 0a163a4f776..6b769afac55 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -122,11 +122,6 @@ struct mem_size_stats unsigned long private_dirty; }; -__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma) -{ - return NULL; -} - static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) { struct proc_maps_private *priv = m->private; diff --git a/include/linux/mm.h b/include/linux/mm.h index 22165cb1890..7b703b6d435 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1131,7 +1131,7 @@ void drop_slab(void); extern int randomize_va_space; #endif -const char *arch_vma_name(struct vm_area_struct *vma); +__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/kernel/signal.c b/kernel/signal.c index bfdb5686fa3..05853a7337e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2577,6 +2577,11 @@ asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) } #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */ +__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma) +{ + return NULL; +} + void __init signals_init(void) { sigqueue_cachep = -- cgit v1.2.3-70-g09d2 From 8e18e2941c53416aa219708e7dcad21fb4bd6794 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Sep 2006 01:50:46 -0700 Subject: [PATCH] inode_diet: Replace inode.u.generic_ip with inode.i_private The following patches reduce the size of the VFS inode structure by 28 bytes on a UP x86. (It would be more on an x86_64 system). This is a 10% reduction in the inode size on a UP kernel that is configured in a production mode (i.e., with no spinlock or other debugging functions enabled; if you want to save memory taken up by in-core inodes, the first thing you should do is disable the debugging options; they are responsible for a huge amount of bloat in the VFS inode structure). This patch: The filesystem or device-specific pointer in the inode is inside a union, which is pretty pointless given that all 30+ users of this field have been using the void pointer. Get rid of the union and rename it to i_private, with a comment to explain who is allowed to use the void pointer. This is just a cleanup, but it allows us to reuse the union 'u' for something something where the union will actually be used. [judith@osdl.org: powerpc build fix] Signed-off-by: "Theodore Ts'o" Signed-off-by: Judith Lebzelter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- arch/powerpc/platforms/pseries/hvCall_inst.c | 2 +- arch/s390/hypfs/inode.c | 6 ++--- arch/s390/kernel/debug.c | 2 +- block/blktrace.c | 2 +- drivers/i2c/chips/tps65010.c | 2 +- drivers/infiniband/hw/ipath/ipath_fs.c | 12 ++++----- drivers/infiniband/ulp/ipoib/ipoib_fs.c | 4 +-- drivers/misc/ibmasm/ibmasmfs.c | 16 ++++++------ drivers/net/irda/vlsi_ir.h | 2 +- drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c | 2 +- drivers/oprofile/oprofilefs.c | 10 ++++---- drivers/pci/hotplug/cpqphp_sysfs.c | 2 +- drivers/usb/core/devio.c | 2 +- drivers/usb/core/inode.c | 6 ++--- drivers/usb/gadget/inode.c | 6 ++--- drivers/usb/host/isp116x-hcd.c | 2 +- drivers/usb/host/uhci-debug.c | 2 +- drivers/usb/mon/mon_stat.c | 2 +- drivers/usb/mon/mon_text.c | 4 +-- fs/autofs/inode.c | 2 +- fs/autofs/symlink.c | 2 +- fs/binfmt_misc.c | 8 +++--- fs/debugfs/file.c | 4 +-- fs/debugfs/inode.c | 4 +-- fs/devpts/inode.c | 4 +-- fs/freevxfs/vxfs.h | 2 +- fs/freevxfs/vxfs_inode.c | 4 +-- fs/fuse/control.c | 6 ++--- fs/inode.c | 2 +- fs/jffs/inode-v23.c | 34 +++++++++++++------------- fs/libfs.c | 2 +- fs/ocfs2/dlmglue.c | 2 +- include/linux/fs.h | 4 +-- kernel/relay.c | 2 +- security/inode.c | 8 +++--- 36 files changed, 88 insertions(+), 90 deletions(-) (limited to 'kernel') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 7b4572805db..b837f12a84a 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -120,7 +120,7 @@ spufs_new_file(struct super_block *sb, struct dentry *dentry, ret = 0; inode->i_op = &spufs_file_iops; inode->i_fop = fops; - inode->u.generic_ip = SPUFS_I(inode)->i_ctx = get_spu_context(ctx); + inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx); d_add(dentry, inode); out: return ret; diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 641e6511cf0..446e17d162a 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -85,7 +85,7 @@ static int hcall_inst_seq_open(struct inode *inode, struct file *file) rc = seq_open(file, &hcall_inst_seq_ops); seq = file->private_data; - seq->private = file->f_dentry->d_inode->u.generic_ip; + seq->private = file->f_dentry->d_inode->i_private; return rc; } diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index bdade5f2e32..8735024d235 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -104,13 +104,13 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode) static void hypfs_drop_inode(struct inode *inode) { - kfree(inode->u.generic_ip); + kfree(inode->i_private); generic_delete_inode(inode); } static int hypfs_open(struct inode *inode, struct file *filp) { - char *data = filp->f_dentry->d_inode->u.generic_ip; + char *data = filp->f_dentry->d_inode->i_private; struct hypfs_sb_info *fs_info; if (filp->f_mode & FMODE_WRITE) { @@ -352,7 +352,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb, parent->d_inode->i_nlink++; } else BUG(); - inode->u.generic_ip = data; + inode->i_private = data; d_instantiate(dentry, inode); dget(dentry); return dentry; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 7ba20922a53..43f3d0c7e13 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -603,7 +603,7 @@ debug_open(struct inode *inode, struct file *file) debug_info_t *debug_info, *debug_info_snapshot; down(&debug_lock); - debug_info = (struct debug_info*)file->f_dentry->d_inode->u.generic_ip; + debug_info = file->f_dentry->d_inode->i_private; /* find debug view */ for (i = 0; i < DEBUG_MAX_VIEWS; i++) { if (!debug_info->views[i]) diff --git a/block/blktrace.c b/block/blktrace.c index 265f7a83061..2b4ef2b89b8 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -217,7 +217,7 @@ static int blk_trace_remove(request_queue_t *q) static int blk_dropped_open(struct inode *inode, struct file *filp) { - filp->private_data = inode->u.generic_ip; + filp->private_data = inode->i_private; return 0; } diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c index 0be6fd6a267..6a757821717 100644 --- a/drivers/i2c/chips/tps65010.c +++ b/drivers/i2c/chips/tps65010.c @@ -305,7 +305,7 @@ static int dbg_show(struct seq_file *s, void *_) static int dbg_tps_open(struct inode *inode, struct file *file) { - return single_open(file, dbg_show, inode->u.generic_ip); + return single_open(file, dbg_show, inode->i_private); } static struct file_operations debug_fops = { diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index a5eb30a06a5..055cdd089b2 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -64,7 +64,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->u.generic_ip = data; + inode->i_private = data; if ((mode & S_IFMT) == S_IFDIR) { inode->i_op = &simple_dir_inode_operations; inode->i_nlink++; @@ -119,7 +119,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf, u16 i; struct ipath_devdata *dd; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; for (i = 0; i < NUM_COUNTERS; i++) counters[i] = ipath_snap_cntr(dd, i); @@ -139,7 +139,7 @@ static ssize_t atomic_node_info_read(struct file *file, char __user *buf, struct ipath_devdata *dd; u64 guid; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; guid = be64_to_cpu(dd->ipath_guid); @@ -178,7 +178,7 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, u32 tmp, tmp2; struct ipath_devdata *dd; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; /* so we only initialize non-zero fields. */ memset(portinfo, 0, sizeof portinfo); @@ -325,7 +325,7 @@ static ssize_t flash_read(struct file *file, char __user *buf, goto bail; } - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; if (ipath_eeprom_read(dd, pos, tmp, count)) { ipath_dev_err(dd, "failed to read from flash\n"); ret = -ENXIO; @@ -381,7 +381,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf, goto bail_tmp; } - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; if (ipath_eeprom_write(dd, pos, tmp, count)) { ret = -ENXIO; ipath_dev_err(dd, "failed to write to flash\n"); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 5dde380e8db..f1cb83688b3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -141,7 +141,7 @@ static int ipoib_mcg_open(struct inode *inode, struct file *file) return ret; seq = file->private_data; - seq->private = inode->u.generic_ip; + seq->private = inode->i_private; return 0; } @@ -247,7 +247,7 @@ static int ipoib_path_open(struct inode *inode, struct file *file) return ret; seq = file->private_data; - seq->private = inode->u.generic_ip; + seq->private = inode->i_private; return 0; } diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index 4a35caff5d0..0e909b61722 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -175,7 +175,7 @@ static struct dentry *ibmasmfs_create_file (struct super_block *sb, } inode->i_fop = fops; - inode->u.generic_ip = data; + inode->i_private = data; d_add(dentry, inode); return dentry; @@ -244,7 +244,7 @@ static int command_file_open(struct inode *inode, struct file *file) { struct ibmasmfs_command_data *command_data; - if (!inode->u.generic_ip) + if (!inode->i_private) return -ENODEV; command_data = kmalloc(sizeof(struct ibmasmfs_command_data), GFP_KERNEL); @@ -252,7 +252,7 @@ static int command_file_open(struct inode *inode, struct file *file) return -ENOMEM; command_data->command = NULL; - command_data->sp = inode->u.generic_ip; + command_data->sp = inode->i_private; file->private_data = command_data; return 0; } @@ -351,10 +351,10 @@ static int event_file_open(struct inode *inode, struct file *file) struct ibmasmfs_event_data *event_data; struct service_processor *sp; - if (!inode->u.generic_ip) + if (!inode->i_private) return -ENODEV; - sp = inode->u.generic_ip; + sp = inode->i_private; event_data = kmalloc(sizeof(struct ibmasmfs_event_data), GFP_KERNEL); if (!event_data) @@ -439,14 +439,14 @@ static int r_heartbeat_file_open(struct inode *inode, struct file *file) { struct ibmasmfs_heartbeat_data *rhbeat; - if (!inode->u.generic_ip) + if (!inode->i_private) return -ENODEV; rhbeat = kmalloc(sizeof(struct ibmasmfs_heartbeat_data), GFP_KERNEL); if (!rhbeat) return -ENOMEM; - rhbeat->sp = (struct service_processor *)inode->u.generic_ip; + rhbeat->sp = inode->i_private; rhbeat->active = 0; ibmasm_init_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat); file->private_data = rhbeat; @@ -508,7 +508,7 @@ static ssize_t r_heartbeat_file_write(struct file *file, const char __user *buf, static int remote_settings_file_open(struct inode *inode, struct file *file) { - file->private_data = inode->u.generic_ip; + file->private_data = inode->i_private; return 0; } diff --git a/drivers/net/irda/vlsi_ir.h b/drivers/net/irda/vlsi_ir.h index a82a4ba8de4..c37f0bc4c7f 100644 --- a/drivers/net/irda/vlsi_ir.h +++ b/drivers/net/irda/vlsi_ir.h @@ -58,7 +58,7 @@ typedef void irqreturn_t; /* PDE() introduced in 2.5.4 */ #ifdef CONFIG_PROC_FS -#define PDE(inode) ((inode)->u.generic_ip) +#define PDE(inode) ((inode)->i_private) #endif /* irda crc16 calculation exported in 2.5.42 */ diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c b/drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c index 923275ea078..b9df06a06ea 100644 --- a/drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c +++ b/drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c @@ -54,7 +54,7 @@ static ssize_t write_file_dummy(struct file *file, const char __user *buf, static int open_file_generic(struct inode *inode, struct file *file) { - file->private_data = inode->u.generic_ip; + file->private_data = inode->i_private; return 0; } diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 71c2da277d6..deb37354785 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -110,8 +110,8 @@ static ssize_t ulong_write_file(struct file * file, char const __user * buf, siz static int default_open(struct inode * inode, struct file * filp) { - if (inode->u.generic_ip) - filp->private_data = inode->u.generic_ip; + if (inode->i_private) + filp->private_data = inode->i_private; return 0; } @@ -158,7 +158,7 @@ int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root, if (!d) return -EFAULT; - d->d_inode->u.generic_ip = val; + d->d_inode->i_private = val; return 0; } @@ -171,7 +171,7 @@ int oprofilefs_create_ro_ulong(struct super_block * sb, struct dentry * root, if (!d) return -EFAULT; - d->d_inode->u.generic_ip = val; + d->d_inode->i_private = val; return 0; } @@ -197,7 +197,7 @@ int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root, if (!d) return -EFAULT; - d->d_inode->u.generic_ip = val; + d->d_inode->i_private = val; return 0; } diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c index 8b3da007e85..5bab666cd67 100644 --- a/drivers/pci/hotplug/cpqphp_sysfs.c +++ b/drivers/pci/hotplug/cpqphp_sysfs.c @@ -140,7 +140,7 @@ struct ctrl_dbg { static int open(struct inode *inode, struct file *file) { - struct controller *ctrl = inode->u.generic_ip; + struct controller *ctrl = inode->i_private; struct ctrl_dbg *dbg; int retval = -ENOMEM; diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 218621b9958..32e03000420 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -555,7 +555,7 @@ static int usbdev_open(struct inode *inode, struct file *file) if (imajor(inode) == USB_DEVICE_MAJOR) dev = usbdev_lookup_minor(iminor(inode)); if (!dev) - dev = inode->u.generic_ip; + dev = inode->i_private; if (!dev) { kfree(ps); goto out; diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 3182c2224ba..482f253085e 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -402,8 +402,8 @@ static loff_t default_file_lseek (struct file *file, loff_t offset, int orig) static int default_open (struct inode *inode, struct file *file) { - if (inode->u.generic_ip) - file->private_data = inode->u.generic_ip; + if (inode->i_private) + file->private_data = inode->i_private; return 0; } @@ -509,7 +509,7 @@ static struct dentry *fs_create_file (const char *name, mode_t mode, } else { if (dentry->d_inode) { if (data) - dentry->d_inode->u.generic_ip = data; + dentry->d_inode->i_private = data; if (fops) dentry->d_inode->i_fop = fops; dentry->d_inode->i_uid = uid; diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 3bdc5e3ba23..ffaa8c1afad 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -844,7 +844,7 @@ fail1: static int ep_open (struct inode *inode, struct file *fd) { - struct ep_data *data = inode->u.generic_ip; + struct ep_data *data = inode->i_private; int value = -EBUSY; if (down_interruptible (&data->lock) != 0) @@ -1909,7 +1909,7 @@ fail: static int dev_open (struct inode *inode, struct file *fd) { - struct dev_data *dev = inode->u.generic_ip; + struct dev_data *dev = inode->i_private; int value = -EBUSY; if (dev->state == STATE_DEV_DISABLED) { @@ -1970,7 +1970,7 @@ gadgetfs_make_inode (struct super_block *sb, inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->u.generic_ip = data; + inode->i_private = data; inode->i_fop = fops; } return inode; diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index 5147ed4a666..8c6b38a0b5b 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -1204,7 +1204,7 @@ static int isp116x_show_dbg(struct seq_file *s, void *unused) static int isp116x_open_seq(struct inode *inode, struct file *file) { - return single_open(file, isp116x_show_dbg, inode->u.generic_ip); + return single_open(file, isp116x_show_dbg, inode->i_private); } static struct file_operations isp116x_debug_fops = { diff --git a/drivers/usb/host/uhci-debug.c b/drivers/usb/host/uhci-debug.c index dc286a48caf..d1372cb27f3 100644 --- a/drivers/usb/host/uhci-debug.c +++ b/drivers/usb/host/uhci-debug.c @@ -428,7 +428,7 @@ struct uhci_debug { static int uhci_debug_open(struct inode *inode, struct file *file) { - struct uhci_hcd *uhci = inode->u.generic_ip; + struct uhci_hcd *uhci = inode->i_private; struct uhci_debug *up; int ret = -ENOMEM; unsigned long flags; diff --git a/drivers/usb/mon/mon_stat.c b/drivers/usb/mon/mon_stat.c index 1fe01d994a7..86ad2b381c4 100644 --- a/drivers/usb/mon/mon_stat.c +++ b/drivers/usb/mon/mon_stat.c @@ -28,7 +28,7 @@ static int mon_stat_open(struct inode *inode, struct file *file) if ((sp = kmalloc(sizeof(struct snap), GFP_KERNEL)) == NULL) return -ENOMEM; - mbus = inode->u.generic_ip; + mbus = inode->i_private; sp->slen = snprintf(sp->str, STAT_BUF_SIZE, "nreaders %d events %u text_lost %u\n", diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c index f961a770cee..2fd39b4fa16 100644 --- a/drivers/usb/mon/mon_text.c +++ b/drivers/usb/mon/mon_text.c @@ -238,7 +238,7 @@ static int mon_text_open(struct inode *inode, struct file *file) int rc; mutex_lock(&mon_lock); - mbus = inode->u.generic_ip; + mbus = inode->i_private; ubus = mbus->u_bus; rp = kzalloc(sizeof(struct mon_reader_text), GFP_KERNEL); @@ -401,7 +401,7 @@ static int mon_text_release(struct inode *inode, struct file *file) struct mon_event_text *ep; mutex_lock(&mon_lock); - mbus = inode->u.generic_ip; + mbus = inode->i_private; if (mbus->nreaders <= 0) { printk(KERN_ERR TAG ": consistency error on close\n"); diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index c81d6b8c282..d39d2acd9b3 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -241,7 +241,7 @@ static void autofs_read_inode(struct inode *inode) inode->i_op = &autofs_symlink_inode_operations; sl = &sbi->symlink[n]; - inode->u.generic_ip = sl; + inode->i_private = sl; inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = sl->mtime; inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c index 52e8772b066..c74f2eb6577 100644 --- a/fs/autofs/symlink.c +++ b/fs/autofs/symlink.c @@ -15,7 +15,7 @@ /* Nothing to release.. */ static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd) { - char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data; + char *s=((struct autofs_symlink *)dentry->d_inode->i_private)->data; nd_set_link(nd, s); return NULL; } diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 34ebbc191e4..6759b9839ce 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -517,7 +517,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) static void bm_clear_inode(struct inode *inode) { - kfree(inode->u.generic_ip); + kfree(inode->i_private); } static void kill_node(Node *e) @@ -545,7 +545,7 @@ static void kill_node(Node *e) static ssize_t bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) { - Node *e = file->f_dentry->d_inode->u.generic_ip; + Node *e = file->f_dentry->d_inode->i_private; loff_t pos = *ppos; ssize_t res; char *page; @@ -579,7 +579,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct dentry *root; - Node *e = file->f_dentry->d_inode->u.generic_ip; + Node *e = file->f_dentry->d_inode->i_private; int res = parse_command(buffer, count); switch (res) { @@ -646,7 +646,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, } e->dentry = dget(dentry); - inode->u.generic_ip = e; + inode->i_private = e; inode->i_fop = &bm_entry_operations; d_instantiate(dentry, inode); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index e4b430552c8..bf3901ab174 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -32,8 +32,8 @@ static ssize_t default_write_file(struct file *file, const char __user *buf, static int default_open(struct inode *inode, struct file *file) { - if (inode->u.generic_ip) - file->private_data = inode->u.generic_ip; + if (inode->i_private) + file->private_data = inode->i_private; return 0; } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 3ca268d2e5a..717f4821ed0 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -168,7 +168,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode, * directory dentry if set. If this paramater is NULL, then the * file will be created in the root of the debugfs filesystem. * @data: a pointer to something that the caller will want to get to later - * on. The inode.u.generic_ip pointer will point to this value on + * on. The inode.i_private pointer will point to this value on * the open() call. * @fops: a pointer to a struct file_operations that should be used for * this file. @@ -209,7 +209,7 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode, if (dentry->d_inode) { if (data) - dentry->d_inode->u.generic_ip = data; + dentry->d_inode->i_private = data; if (fops) dentry->d_inode->i_fop = fops; } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index f7aef5bb584..5bf06a10ddd 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -177,7 +177,7 @@ int devpts_pty_new(struct tty_struct *tty) inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; init_special_inode(inode, S_IFCHR|config.mode, device); - inode->u.generic_ip = tty; + inode->i_private = tty; dentry = get_node(number); if (!IS_ERR(dentry) && !dentry->d_inode) @@ -196,7 +196,7 @@ struct tty_struct *devpts_get_tty(int number) tty = NULL; if (!IS_ERR(dentry)) { if (dentry->d_inode) - tty = dentry->d_inode->u.generic_ip; + tty = dentry->d_inode->i_private; dput(dentry); } diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h index d35979a5874..c8a92652612 100644 --- a/fs/freevxfs/vxfs.h +++ b/fs/freevxfs/vxfs.h @@ -252,7 +252,7 @@ enum { * Get filesystem private data from VFS inode. */ #define VXFS_INO(ip) \ - ((struct vxfs_inode_info *)(ip)->u.generic_ip) + ((struct vxfs_inode_info *)(ip)->i_private) /* * Get filesystem private data from VFS superblock. diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index ca6a3971477..32a82ed108e 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -243,7 +243,7 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip) ip->i_blocks = vip->vii_blocks; ip->i_generation = vip->vii_gen; - ip->u.generic_ip = (void *)vip; + ip->i_private = vip; } @@ -338,5 +338,5 @@ vxfs_read_inode(struct inode *ip) void vxfs_clear_inode(struct inode *ip) { - kmem_cache_free(vxfs_inode_cachep, ip->u.generic_ip); + kmem_cache_free(vxfs_inode_cachep, ip->i_private); } diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 46fe60b2da2..79ec1f23d4d 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -23,7 +23,7 @@ static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file) { struct fuse_conn *fc; mutex_lock(&fuse_mutex); - fc = file->f_dentry->d_inode->u.generic_ip; + fc = file->f_dentry->d_inode->i_private; if (fc) fc = fuse_conn_get(fc); mutex_unlock(&fuse_mutex); @@ -98,7 +98,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, inode->i_op = iop; inode->i_fop = fop; inode->i_nlink = nlink; - inode->u.generic_ip = fc; + inode->i_private = fc; d_add(dentry, inode); return dentry; } @@ -150,7 +150,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) for (i = fc->ctl_ndents - 1; i >= 0; i--) { struct dentry *dentry = fc->ctl_dentry[i]; - dentry->d_inode->u.generic_ip = NULL; + dentry->d_inode->i_private = NULL; d_drop(dentry); dput(dentry); } diff --git a/fs/inode.c b/fs/inode.c index 0bf9f0444a9..77e25479202 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -163,7 +163,7 @@ static struct inode *alloc_inode(struct super_block *sb) bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; mapping->backing_dev_info = bdi; } - memset(&inode->u, 0, sizeof(inode->u)); + inode->i_private = 0; inode->i_mapping = mapping; } return inode; diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index b59553d28d1..7358ef87f16 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -369,7 +369,7 @@ jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode, f = jffs_find_file(c, raw_inode->ino); - inode->u.generic_ip = (void *)f; + inode->i_private = (void *)f; insert_inode_hash(inode); return inode; @@ -442,7 +442,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry, }); result = -ENOTDIR; - if (!(old_dir_f = (struct jffs_file *)old_dir->u.generic_ip)) { + if (!(old_dir_f = old_dir->i_private)) { D(printk("jffs_rename(): Old dir invalid.\n")); goto jffs_rename_end; } @@ -456,7 +456,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry, /* Find the new directory. */ result = -ENOTDIR; - if (!(new_dir_f = (struct jffs_file *)new_dir->u.generic_ip)) { + if (!(new_dir_f = new_dir->i_private)) { D(printk("jffs_rename(): New dir invalid.\n")); goto jffs_rename_end; } @@ -593,7 +593,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir) } else { ddino = ((struct jffs_file *) - inode->u.generic_ip)->pino; + inode->i_private)->pino; } D3(printk("jffs_readdir(): \"..\" %u\n", ddino)); if (filldir(dirent, "..", 2, filp->f_pos, ddino, DT_DIR) < 0) { @@ -604,7 +604,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir) } filp->f_pos++; } - f = ((struct jffs_file *)inode->u.generic_ip)->children; + f = ((struct jffs_file *)inode->i_private)->children; j = 2; while(f && (f->deleted || j++ < filp->f_pos )) { @@ -668,7 +668,7 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) } r = -EACCES; - if (!(d = (struct jffs_file *)dir->u.generic_ip)) { + if (!(d = (struct jffs_file *)dir->i_private)) { D(printk("jffs_lookup(): No such inode! (%lu)\n", dir->i_ino)); goto jffs_lookup_end; @@ -739,7 +739,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page) unsigned long read_len; int result; struct inode *inode = (struct inode*)page->mapping->host; - struct jffs_file *f = (struct jffs_file *)inode->u.generic_ip; + struct jffs_file *f = (struct jffs_file *)inode->i_private; struct jffs_control *c = (struct jffs_control *)inode->i_sb->s_fs_info; int r; loff_t offset; @@ -828,7 +828,7 @@ jffs_mkdir(struct inode *dir, struct dentry *dentry, int mode) }); lock_kernel(); - dir_f = (struct jffs_file *)dir->u.generic_ip; + dir_f = dir->i_private; ASSERT(if (!dir_f) { printk(KERN_ERR "jffs_mkdir(): No reference to a " @@ -972,7 +972,7 @@ jffs_remove(struct inode *dir, struct dentry *dentry, int type) kfree(_name); }); - dir_f = (struct jffs_file *) dir->u.generic_ip; + dir_f = dir->i_private; c = dir_f->c; result = -ENOENT; @@ -1082,7 +1082,7 @@ jffs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) if (!old_valid_dev(rdev)) return -EINVAL; lock_kernel(); - dir_f = (struct jffs_file *)dir->u.generic_ip; + dir_f = dir->i_private; c = dir_f->c; D3(printk (KERN_NOTICE "mknod(): down biglock\n")); @@ -1186,7 +1186,7 @@ jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) kfree(_symname); }); - dir_f = (struct jffs_file *)dir->u.generic_ip; + dir_f = dir->i_private; ASSERT(if (!dir_f) { printk(KERN_ERR "jffs_symlink(): No reference to a " "jffs_file struct in inode.\n"); @@ -1289,7 +1289,7 @@ jffs_create(struct inode *dir, struct dentry *dentry, int mode, kfree(s); }); - dir_f = (struct jffs_file *)dir->u.generic_ip; + dir_f = dir->i_private; ASSERT(if (!dir_f) { printk(KERN_ERR "jffs_create(): No reference to a " "jffs_file struct in inode.\n"); @@ -1403,9 +1403,9 @@ jffs_file_write(struct file *filp, const char *buf, size_t count, goto out_isem; } - if (!(f = (struct jffs_file *)inode->u.generic_ip)) { - D(printk("jffs_file_write(): inode->u.generic_ip = 0x%p\n", - inode->u.generic_ip)); + if (!(f = inode->i_private)) { + D(printk("jffs_file_write(): inode->i_private = 0x%p\n", + inode->i_private)); goto out_isem; } @@ -1693,7 +1693,7 @@ jffs_read_inode(struct inode *inode) mutex_unlock(&c->fmc->biglock); return; } - inode->u.generic_ip = (void *)f; + inode->i_private = f; inode->i_mode = f->mode; inode->i_nlink = f->nlink; inode->i_uid = f->uid; @@ -1748,7 +1748,7 @@ jffs_delete_inode(struct inode *inode) lock_kernel(); inode->i_size = 0; inode->i_blocks = 0; - inode->u.generic_ip = NULL; + inode->i_private = NULL; clear_inode(inode); if (inode->i_nlink == 0) { c = (struct jffs_control *) inode->i_sb->s_fs_info; diff --git a/fs/libfs.c b/fs/libfs.c index ac02ea602c3..2751793beea 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -547,7 +547,7 @@ int simple_attr_open(struct inode *inode, struct file *file, attr->get = get; attr->set = set; - attr->data = inode->u.generic_ip; + attr->data = inode->i_private; attr->fmt = fmt; mutex_init(&attr->mutex); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index de887063dcf..8801e41afe8 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2052,7 +2052,7 @@ static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) mlog_errno(ret); goto out; } - osb = (struct ocfs2_super *) inode->u.generic_ip; + osb = inode->i_private; ocfs2_get_dlm_debug(osb->osb_dlm_debug); priv->p_dlm_debug = osb->osb_dlm_debug; INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); diff --git a/include/linux/fs.h b/include/linux/fs.h index 1d3e601ece7..4f77ec9c335 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -554,9 +554,7 @@ struct inode { atomic_t i_writecount; void *i_security; - union { - void *generic_ip; - } u; + void *i_private; /* fs or device private pointer */ #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; #endif diff --git a/kernel/relay.c b/kernel/relay.c index 33345e73485..85786ff2a4f 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -669,7 +669,7 @@ EXPORT_SYMBOL_GPL(relay_flush); */ static int relay_file_open(struct inode *inode, struct file *filp) { - struct rchan_buf *buf = inode->u.generic_ip; + struct rchan_buf *buf = inode->i_private; kref_get(&buf->kref); filp->private_data = buf; diff --git a/security/inode.c b/security/inode.c index 47eb63480da..176aacea8ca 100644 --- a/security/inode.c +++ b/security/inode.c @@ -44,8 +44,8 @@ static ssize_t default_write_file(struct file *file, const char __user *buf, static int default_open(struct inode *inode, struct file *file) { - if (inode->u.generic_ip) - file->private_data = inode->u.generic_ip; + if (inode->i_private) + file->private_data = inode->i_private; return 0; } @@ -194,7 +194,7 @@ static int create_by_name(const char *name, mode_t mode, * directory dentry if set. If this paramater is NULL, then the * file will be created in the root of the securityfs filesystem. * @data: a pointer to something that the caller will want to get to later - * on. The inode.u.generic_ip pointer will point to this value on + * on. The inode.i_private pointer will point to this value on * the open() call. * @fops: a pointer to a struct file_operations that should be used for * this file. @@ -240,7 +240,7 @@ struct dentry *securityfs_create_file(const char *name, mode_t mode, if (fops) dentry->d_inode->i_fop = fops; if (data) - dentry->d_inode->u.generic_ip = data; + dentry->d_inode->i_private = data; } exit: return dentry; -- cgit v1.2.3-70-g09d2 From ba52de123d454b57369f291348266d86f4b35070 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Sep 2006 01:50:49 -0700 Subject: [PATCH] inode-diet: Eliminate i_blksize from the inode structure This eliminates the i_blksize field from struct inode. Filesystems that want to provide a per-inode st_blksize can do so by providing their own getattr routine instead of using the generic_fillattr() function. Note that some filesystems were providing pretty much random (and incorrect) values for i_blksize. [bunk@stusta.de: cleanup] [akpm@osdl.org: generic_fillattr() fix] Signed-off-by: "Theodore Ts'o" Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/cell/spufs/inode.c | 1 - arch/s390/hypfs/inode.c | 1 - drivers/block/loop.c | 7 +++++-- drivers/infiniband/hw/ipath/ipath_fs.c | 1 - drivers/isdn/capi/capifs.c | 2 -- drivers/misc/ibmasm/ibmasmfs.c | 1 - drivers/oprofile/oprofilefs.c | 1 - drivers/usb/core/inode.c | 1 - drivers/usb/gadget/inode.c | 1 - fs/9p/vfs_inode.c | 4 +--- fs/adfs/inode.c | 1 - fs/afs/inode.c | 1 - fs/autofs/inode.c | 1 - fs/autofs4/inode.c | 1 - fs/befs/linuxvfs.c | 1 - fs/bfs/dir.c | 2 +- fs/bfs/inode.c | 1 - fs/binfmt_misc.c | 1 - fs/cifs/cifsfs.c | 1 - fs/cifs/readdir.c | 5 ++--- fs/coda/coda_linux.c | 2 -- fs/configfs/inode.c | 1 - fs/cramfs/inode.c | 1 - fs/debugfs/inode.c | 1 - fs/devpts/inode.c | 2 -- fs/eventpoll.c | 1 - fs/ext2/ialloc.c | 1 - fs/ext2/inode.c | 1 - fs/ext3/ialloc.c | 1 - fs/ext3/inode.c | 3 --- fs/fat/inode.c | 3 --- fs/freevxfs/vxfs_inode.c | 1 - fs/fuse/inode.c | 1 - fs/hfs/inode.c | 2 -- fs/hfsplus/inode.c | 2 -- fs/hostfs/hostfs_kern.c | 1 - fs/hpfs/inode.c | 1 - fs/hppfs/hppfs_kern.c | 1 - fs/hugetlbfs/inode.c | 1 - fs/isofs/inode.c | 3 +-- fs/jffs/inode-v23.c | 2 -- fs/jffs2/fs.c | 2 -- fs/jfs/jfs_extent.c | 2 +- fs/jfs/jfs_imap.c | 1 - fs/jfs/jfs_inode.c | 1 - fs/jfs/jfs_metapage.c | 2 +- fs/libfs.c | 2 -- fs/minix/bitmap.c | 2 +- fs/minix/inode.c | 4 ++-- fs/ncpfs/inode.c | 1 - fs/nfs/inode.c | 4 ---- fs/ntfs/inode.c | 4 ---- fs/ntfs/mft.c | 5 ----- fs/ocfs2/dlm/dlmfs.c | 2 -- fs/ocfs2/inode.c | 4 ---- fs/pipe.c | 1 - fs/qnx4/inode.c | 1 - fs/ramfs/inode.c | 1 - fs/reiserfs/inode.c | 4 ---- fs/smbfs/inode.c | 2 -- fs/smbfs/proc.c | 1 - fs/stat.c | 3 ++- fs/sysfs/inode.c | 1 - fs/sysv/ialloc.c | 2 +- fs/sysv/inode.c | 2 +- fs/udf/ialloc.c | 1 - fs/udf/inode.c | 2 -- fs/ufs/ialloc.c | 1 - fs/ufs/inode.c | 1 - fs/xfs/linux-2.6/xfs_super.c | 1 - fs/xfs/linux-2.6/xfs_vnode.c | 1 - include/linux/fs.h | 1 - include/linux/nfsd/nfsfh.h | 10 ++-------- include/linux/smb.h | 1 - ipc/mqueue.c | 1 - kernel/cpuset.c | 1 - mm/shmem.c | 1 - net/sunrpc/rpc_pipe.c | 1 - security/inode.c | 1 - security/selinux/selinuxfs.c | 1 - 80 files changed, 21 insertions(+), 125 deletions(-) (limited to 'kernel') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index b837f12a84a..3950ddccb2c 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -82,7 +82,6 @@ spufs_new_inode(struct super_block *sb, int mode) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; out: diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 8735024d235..813fc21358f 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -91,7 +91,6 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode) ret->i_mode = mode; ret->i_uid = hypfs_info->uid; ret->i_gid = hypfs_info->gid; - ret->i_blksize = PAGE_CACHE_SIZE; ret->i_blocks = 0; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; if (mode & S_IFDIR) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 7b3b94ddddc..c774121684d 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -662,7 +662,8 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p) mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); lo->lo_backing_file = file; - lo->lo_blocksize = mapping->host->i_blksize; + lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? + mapping->host->i_bdev->bd_block_size : PAGE_SIZE; lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); complete(&p->wait); @@ -794,7 +795,9 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file, if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) lo_flags |= LO_FLAGS_READ_ONLY; - lo_blocksize = inode->i_blksize; + lo_blocksize = S_ISBLK(inode->i_mode) ? + inode->i_bdev->bd_block_size : PAGE_SIZE; + error = 0; } else { goto out_putf; diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 055cdd089b2..c8a8af0fe47 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -61,7 +61,6 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_private = data; diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c index 9ea6bd0ddc3..2dd1b57b0ba 100644 --- a/drivers/isdn/capi/capifs.c +++ b/drivers/isdn/capi/capifs.c @@ -104,7 +104,6 @@ capifs_fill_super(struct super_block *s, void *data, int silent) inode->i_ino = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_blocks = 0; - inode->i_blksize = 1024; inode->i_uid = inode->i_gid = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &simple_dir_inode_operations; @@ -149,7 +148,6 @@ void capifs_new_ncci(unsigned int number, dev_t device) if (!inode) return; inode->i_ino = number+2; - inode->i_blksize = 1024; inode->i_uid = config.setuid ? config.uid : current->fsuid; inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index 0e909b61722..b99dc507de2 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -147,7 +147,6 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode) if (ret) { ret->i_mode = mode; ret->i_uid = ret->i_gid = 0; - ret->i_blksize = PAGE_CACHE_SIZE; ret->i_blocks = 0; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; } diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index deb37354785..5756401fb15 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -31,7 +31,6 @@ static struct inode * oprofilefs_get_inode(struct super_block * sb, int mode) inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 482f253085e..58b4b101212 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -249,7 +249,6 @@ static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t de inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index ffaa8c1afad..2a7162d8979 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -1966,7 +1966,6 @@ gadgetfs_make_inode (struct super_block *sb, inode->i_mode = mode; inode->i_uid = default_uid; inode->i_gid = default_gid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index eae50c9d6dc..7a7ec2d1d2f 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -204,7 +204,6 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = sb->s_blocksize; inode->i_blocks = 0; inode->i_rdev = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; @@ -950,9 +949,8 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, inode->i_size = stat->length; - inode->i_blksize = sb->s_blocksize; inode->i_blocks = - (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits; + (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; } /** diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 534f3eecc98..7e7a04be127 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -269,7 +269,6 @@ adfs_iget(struct super_block *sb, struct object_info *obj) inode->i_ino = obj->file_id; inode->i_size = obj->size; inode->i_nlink = 2; - inode->i_blksize = PAGE_SIZE; inode->i_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 4ebb30a50ed..6f37754906c 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -72,7 +72,6 @@ static int afs_inode_map_status(struct afs_vnode *vnode) inode->i_ctime.tv_sec = vnode->status.mtime_server; inode->i_ctime.tv_nsec = 0; inode->i_atime = inode->i_mtime = inode->i_ctime; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_version = vnode->fid.unique; inode->i_mapping->a_ops = &afs_fs_aops; diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index d39d2acd9b3..2c9759baad6 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -216,7 +216,6 @@ static void autofs_read_inode(struct inode *inode) inode->i_nlink = 2; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_blocks = 0; - inode->i_blksize = 1024; if ( ino == AUTOFS_ROOT_INO ) { inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 11a6a9ae51b..800ce876cae 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -447,7 +447,6 @@ struct inode *autofs4_get_inode(struct super_block *sb, inode->i_uid = 0; inode->i_gid = 0; } - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index f6676fbe948..57020c7a7e6 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -365,7 +365,6 @@ befs_read_inode(struct inode *inode) inode->i_mtime.tv_nsec = 0; /* lower 16 bits are not a time */ inode->i_ctime = inode->i_mtime; inode->i_atime = inode->i_mtime; - inode->i_blksize = befs_sb->block_size; befs_ino->i_inode_num = fsrun_to_cpu(sb, raw_inode->inode_num); befs_ino->i_parent = fsrun_to_cpu(sb, raw_inode->parent); diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 26fad962173..dcf04cb1328 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -102,7 +102,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, inode->i_uid = current->fsuid; inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; inode->i_op = &bfs_file_inops; inode->i_fop = &bfs_file_operations; inode->i_mapping->a_ops = &bfs_aops; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 8fc2e8e49db..ed27ffb3459 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -76,7 +76,6 @@ static void bfs_read_inode(struct inode * inode) inode->i_size = BFS_FILESIZE(di); inode->i_blocks = BFS_FILEBLOCKS(di); if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks); - inode->i_blksize = PAGE_SIZE; inode->i_atime.tv_sec = le32_to_cpu(di->i_atime); inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime); inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 6759b9839ce..66ba137f866 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -507,7 +507,6 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 4197a5043f1..22bcf4d7e7a 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -253,7 +253,6 @@ cifs_alloc_inode(struct super_block *sb) file data or metadata */ cifs_inode->clientCanCacheRead = FALSE; cifs_inode->clientCanCacheAll = FALSE; - cifs_inode->vfs_inode.i_blksize = CIFS_MAX_MSGSIZE; cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; INIT_LIST_HEAD(&cifs_inode->openFileList); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 9aeb58a7d36..b27b34537bf 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -216,10 +216,9 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, if (allocation_size < end_of_file) cFYI(1, ("May be sparse file, allocation less than file size")); - cFYI(1, ("File Size %ld and blocks %llu and blocksize %ld", + cFYI(1, ("File Size %ld and blocks %llu", (unsigned long)tmp_inode->i_size, - (unsigned long long)tmp_inode->i_blocks, - tmp_inode->i_blksize)); + (unsigned long long)tmp_inode->i_blocks)); if (S_ISREG(tmp_inode->i_mode)) { cFYI(1, ("File inode")); tmp_inode->i_op = &cifs_file_inode_ops; diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index 5597080cb81..95a54253c04 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -110,8 +110,6 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr) inode->i_nlink = attr->va_nlink; if (attr->va_size != -1) inode->i_size = attr->va_size; - if (attr->va_blocksize != -1) - inode->i_blksize = attr->va_blocksize; if (attr->va_size != -1) inode->i_blocks = (attr->va_size + 511) >> 9; if (attr->va_atime.tv_sec != -1) diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 5047e6a7581..fb18917954a 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -135,7 +135,6 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) { struct inode * inode = new_inode(configfs_sb); if (inode) { - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mapping->a_ops = &configfs_aops; inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index d09b6777c41..ad96b699071 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -73,7 +73,6 @@ static int cramfs_iget5_set(struct inode *inode, void *opaque) inode->i_uid = cramfs_inode->uid; inode->i_size = cramfs_inode->size; inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_gid = cramfs_inode->gid; /* Struct copy intentional */ inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 717f4821ed0..269e649e6dc 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -40,7 +40,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 5bf06a10ddd..5f7b5a6025b 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -113,7 +113,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent) inode->i_ino = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_blocks = 0; - inode->i_blksize = 1024; inode->i_uid = inode->i_gid = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &simple_dir_inode_operations; @@ -172,7 +171,6 @@ int devpts_pty_new(struct tty_struct *tty) return -ENOMEM; inode->i_ino = number+2; - inode->i_blksize = 1024; inode->i_uid = config.setuid ? config.uid : current->fsuid; inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 3a3567433b9..8d544334bcd 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1590,7 +1590,6 @@ static struct inode *ep_eventpoll_inode(void) inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_blksize = PAGE_SIZE; return inode; eexit_1: diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 695f69ccf90..2cb545bf0f3 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -574,7 +574,6 @@ got: inode->i_mode = mode; inode->i_ino = ino; - inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; memset(ei->i_data, 0, sizeof(ei->i_data)); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index fb4d3220eb8..dd4e14c221e 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1094,7 +1094,6 @@ void ext2_read_inode (struct inode * inode) brelse (bh); goto bad_inode; } - inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); ei->i_flags = le32_to_cpu(raw_inode->i_flags); ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 1e4ded8aa3c..e45dbd65173 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -559,7 +559,6 @@ got: inode->i_ino = ino; /* This is the optimal IO size (for stat), not the fs block size */ - inode->i_blksize = PAGE_SIZE; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index c9fc15f8b60..dcf4f1dd108 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2632,9 +2632,6 @@ void ext3_read_inode(struct inode * inode) * recovery code: that's fine, we're about to complete * the process of deleting those. */ } - inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size - * (for stat), not the fs block - * size */ inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); ei->i_flags = le32_to_cpu(raw_inode->i_flags); #ifdef EXT3_FRAGMENTS diff --git a/fs/fat/inode.c b/fs/fat/inode.c index e1035a59066..ab96ae82375 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -370,8 +370,6 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_flags |= S_IMMUTABLE; } MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; - /* this is as close to the truth as we can get ... */ - inode->i_blksize = sbi->cluster_size; inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; inode->i_mtime.tv_sec = @@ -1131,7 +1129,6 @@ static int fat_read_root(struct inode *inode) MSDOS_I(inode)->i_start = 0; inode->i_size = sbi->dir_entries * sizeof(struct msdos_dir_entry); } - inode->i_blksize = sbi->cluster_size; inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; MSDOS_I(inode)->i_logstart = 0; diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 32a82ed108e..4786d51ad3b 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -239,7 +239,6 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip) ip->i_ctime.tv_nsec = 0; ip->i_mtime.tv_nsec = 0; - ip->i_blksize = PAGE_SIZE; ip->i_blocks = vip->vii_blocks; ip->i_generation = vip->vii_gen; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7d25092262a..cb7cadb0b79 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -118,7 +118,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr) inode->i_uid = attr->uid; inode->i_gid = attr->gid; i_size_write(inode, attr->size); - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = attr->blocks; inode->i_atime.tv_sec = attr->atime; inode->i_atime.tv_nsec = attr->atimensec; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 315cf44a90b..d05641c35fc 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -154,7 +154,6 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode) inode->i_gid = current->fsgid; inode->i_nlink = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - inode->i_blksize = HFS_SB(sb)->alloc_blksz; HFS_I(inode)->flags = 0; HFS_I(inode)->rsrc_inode = NULL; HFS_I(inode)->fs_blocks = 0; @@ -284,7 +283,6 @@ static int hfs_read_inode(struct inode *inode, void *data) inode->i_uid = hsb->s_uid; inode->i_gid = hsb->s_gid; inode->i_nlink = 1; - inode->i_blksize = HFS_SB(inode->i_sb)->alloc_blksz; if (idata->key) HFS_I(inode)->cat_key = *idata->key; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 924ecdef809..0eb1a609266 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -304,7 +304,6 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode) inode->i_gid = current->fsgid; inode->i_nlink = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - inode->i_blksize = HFSPLUS_SB(sb).alloc_blksz; INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); init_MUTEX(&HFSPLUS_I(inode).extents_lock); atomic_set(&HFSPLUS_I(inode).opencnt, 0); @@ -407,7 +406,6 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); HFSPLUS_I(inode).dev = 0; - inode->i_blksize = HFSPLUS_SB(inode->i_sb).alloc_blksz; if (type == HFSPLUS_FOLDER) { struct hfsplus_cat_folder *folder = &entry.folder; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index b82e3d9c879..322e876c35e 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -156,7 +156,6 @@ static int read_name(struct inode *ino, char *name) ino->i_mode = i_mode; ino->i_nlink = i_nlink; ino->i_size = i_size; - ino->i_blksize = i_blksize; ino->i_blocks = i_blocks; return(0); } diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 56f2c338c4d..bcf6ee36e06 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -17,7 +17,6 @@ void hpfs_init_inode(struct inode *i) i->i_gid = hpfs_sb(sb)->sb_gid; i->i_mode = hpfs_sb(sb)->sb_mode; hpfs_inode->i_conv = hpfs_sb(sb)->sb_conv; - i->i_blksize = 512; i->i_size = -1; i->i_blocks = -1; diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index 3a9bdf58166..dcb6d2e988b 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -152,7 +152,6 @@ static void hppfs_read_inode(struct inode *ino) ino->i_mode = proc_ino->i_mode; ino->i_nlink = proc_ino->i_nlink; ino->i_size = proc_ino->i_size; - ino->i_blksize = proc_ino->i_blksize; ino->i_blocks = proc_ino->i_blocks; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c3920c96dad..e025a31b4c6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -357,7 +357,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode->i_mode = mode; inode->i_uid = uid; inode->i_gid = gid; - inode->i_blksize = HPAGE_SIZE; inode->i_blocks = 0; inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 10e47897bac..4527692f432 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1235,7 +1235,7 @@ static void isofs_read_inode(struct inode *inode) } inode->i_uid = sbi->s_uid; inode->i_gid = sbi->s_gid; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; ei->i_format_parm[0] = 0; ei->i_format_parm[1] = 0; @@ -1291,7 +1291,6 @@ static void isofs_read_inode(struct inode *inode) isonum_711 (de->ext_attr_length)); /* Set the number of blocks for stat() - should be done before RR */ - inode->i_blksize = PAGE_CACHE_SIZE; /* For stat() only */ inode->i_blocks = (inode->i_size + 511) >> 9; /* diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 7358ef87f16..f5cf9c93e24 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -364,7 +364,6 @@ jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode, inode->i_ctime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; - inode->i_blksize = PAGE_SIZE; inode->i_blocks = (inode->i_size + 511) >> 9; f = jffs_find_file(c, raw_inode->ino); @@ -1706,7 +1705,6 @@ jffs_read_inode(struct inode *inode) inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; - inode->i_blksize = PAGE_SIZE; inode->i_blocks = (inode->i_size + 511) >> 9; if (S_ISREG(inode->i_mode)) { inode->i_op = &jffs_file_inode_operations; diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 4780f82825d..72d9909d95f 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -263,7 +263,6 @@ void jffs2_read_inode (struct inode *inode) inode->i_nlink = f->inocache->nlink; - inode->i_blksize = PAGE_SIZE; inode->i_blocks = (inode->i_size + 511) >> 9; switch (inode->i_mode & S_IFMT) { @@ -449,7 +448,6 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime)); - inode->i_blksize = PAGE_SIZE; inode->i_blocks = 0; inode->i_size = 0; diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index 4d52593a5fc..4c74f0944f7 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -468,7 +468,7 @@ int extRecord(struct inode *ip, xad_t * xp) int extFill(struct inode *ip, xad_t * xp) { int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; - s64 blkno = offsetXAD(xp) >> ip->i_blksize; + s64 blkno = offsetXAD(xp) >> ip->i_blkbits; // assert(ISSPARSE(ip)); diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index ccbe60aff83..369d7f39c04 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -3115,7 +3115,6 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); - ip->i_blksize = ip->i_sb->s_blocksize; ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); ip->i_generation = le32_to_cpu(dip->di_gen); diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 495df402916..bffaca9ae3a 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -115,7 +115,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode) } jfs_inode->mode2 |= mode; - inode->i_blksize = sb->s_blocksize; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; jfs_inode->otime = inode->i_ctime.tv_sec; diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index e1e0a6e6ebd..f5afc129d6b 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -257,7 +257,7 @@ static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock, int rc = 0; int xflag; s64 xaddr; - sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >> + sector_t file_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >> inode->i_blkbits; if (lblock >= file_blocks) diff --git a/fs/libfs.c b/fs/libfs.c index 2751793beea..8db5afb7b0a 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -383,7 +383,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files return -ENOMEM; inode->i_mode = S_IFDIR | 0755; inode->i_uid = inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_op = &simple_dir_inode_operations; @@ -405,7 +404,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files goto out; inode->i_mode = S_IFREG | files->mode; inode->i_uid = inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_fop = files->ops; diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 4a6abc49418..df6b1075b54 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -254,7 +254,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error) inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; inode->i_ino = j; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; memset(&minix_i(inode)->u, 0, sizeof(minix_i(inode)->u)); insert_inode_hash(inode); mark_inode_dirty(inode); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 826b9d83065..c11a4b9fb86 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -396,7 +396,7 @@ static void V1_minix_read_inode(struct inode * inode) inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; for (i = 0; i < 9; i++) minix_inode->u.i1_data[i] = raw_inode->i_zone[i]; minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0])); @@ -429,7 +429,7 @@ static void V2_minix_read_inode(struct inode * inode) inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; for (i = 0; i < 10; i++) minix_inode->u.i2_data[i] = raw_inode->i_zone[i]; minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0])); diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 8244710e97d..42e3bef270c 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -223,7 +223,6 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo) inode->i_nlink = 1; inode->i_uid = server->m.uid; inode->i_gid = server->m.gid; - inode->i_blksize = NCP_BLOCK_SIZE; ncp_update_dates(inode, &nwinfo->i); ncp_update_inode(inode, nwinfo); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 931f52a1957..bc9376ca86c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -277,10 +277,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - inode->i_blksize = inode->i_sb->s_blocksize; } else { inode->i_blocks = fattr->du.nfs2.blocks; - inode->i_blksize = fattr->du.nfs2.blocksize; } nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; @@ -969,10 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - inode->i_blksize = inode->i_sb->s_blocksize; } else { inode->i_blocks = fattr->du.nfs2.blocks; - inode->i_blksize = fattr->du.nfs2.blocksize; } if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 31852121b3f..933dbd89c2a 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -556,8 +556,6 @@ static int ntfs_read_locked_inode(struct inode *vi) /* Setup the generic vfs inode parts now. */ - /* This is the optimal IO size (for stat), not the fs block size. */ - vi->i_blksize = PAGE_CACHE_SIZE; /* * This is for checking whether an inode has changed w.r.t. a file so * that the file can be updated if necessary (compare with f_version). @@ -1234,7 +1232,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) base_ni = NTFS_I(base_vi); /* Just mirror the values from the base inode. */ - vi->i_blksize = base_vi->i_blksize; vi->i_version = base_vi->i_version; vi->i_uid = base_vi->i_uid; vi->i_gid = base_vi->i_gid; @@ -1504,7 +1501,6 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) ni = NTFS_I(vi); base_ni = NTFS_I(base_vi); /* Just mirror the values from the base inode. */ - vi->i_blksize = base_vi->i_blksize; vi->i_version = base_vi->i_version; vi->i_uid = base_vi->i_uid; vi->i_gid = base_vi->i_gid; diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 578fb3d5e80..584260fd684 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -2637,11 +2637,6 @@ mft_rec_already_initialized: goto undo_mftbmp_alloc; } vi->i_ino = bit; - /* - * This is the optimal IO size (for stat), not the fs block - * size. - */ - vi->i_blksize = PAGE_CACHE_SIZE; /* * This is for checking whether an inode has changed w.r.t. a * file so that the file can be updated if necessary (compare diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 0ff0898a0b9..0368c640218 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -335,7 +335,6 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; @@ -362,7 +361,6 @@ static struct inode *dlmfs_get_inode(struct inode *parent, inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 69d3db56916..16e8e74dc96 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -269,7 +269,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, inode->i_mode = le16_to_cpu(fe->i_mode); inode->i_uid = le32_to_cpu(fe->i_uid); inode->i_gid = le32_to_cpu(fe->i_gid); - inode->i_blksize = (u32)osb->s_clustersize; /* Fast symlinks will have i_size but no allocated clusters. */ if (S_ISLNK(inode->i_mode) && !fe->i_clusters) @@ -1258,8 +1257,6 @@ leave: void ocfs2_refresh_inode(struct inode *inode, struct ocfs2_dinode *fe) { - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - spin_lock(&OCFS2_I(inode)->ip_lock); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); @@ -1270,7 +1267,6 @@ void ocfs2_refresh_inode(struct inode *inode, inode->i_uid = le32_to_cpu(fe->i_uid); inode->i_gid = le32_to_cpu(fe->i_gid); inode->i_mode = le16_to_cpu(fe->i_mode); - inode->i_blksize = (u32) osb->s_clustersize; if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0) inode->i_blocks = 0; else diff --git a/fs/pipe.c b/fs/pipe.c index 20352573e02..f3b6f71e9d0 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -879,7 +879,6 @@ static struct inode * get_pipe_inode(void) inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_blksize = PAGE_SIZE; return inode; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index fddbd61c68d..5a41db2a218 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -496,7 +496,6 @@ static void qnx4_read_inode(struct inode *inode) inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->di_ctime); inode->i_ctime.tv_nsec = 0; inode->i_blocks = le32_to_cpu(raw_inode->di_first_xtnt.xtnt_size); - inode->i_blksize = QNX4_DIR_ENTRY_SIZE; memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE); if (S_ISREG(inode->i_mode)) { diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index b9677335cc8..bc0e5166242 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -58,7 +58,6 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mapping->a_ops = &ramfs_aops; inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 52f1e213654..8810fda0da4 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -17,8 +17,6 @@ #include #include -extern int reiserfs_default_io_size; /* default io size devuned in super.c */ - static int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); static int reiserfs_prepare_write(struct file *f, struct page *page, @@ -1122,7 +1120,6 @@ static void init_inode(struct inode *inode, struct path *path) ih = PATH_PITEM_HEAD(path); copy_key(INODE_PKEY(inode), &(ih->ih_key)); - inode->i_blksize = reiserfs_default_io_size; INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); REISERFS_I(inode)->i_flags = 0; @@ -1877,7 +1874,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, } // these do not go to on-disk stat data inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); - inode->i_blksize = reiserfs_default_io_size; // store in in-core inode the key of stat data and version all // object items will have (directory items will have old offset diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 92cf60aa612..2c122ee83ad 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -166,7 +166,6 @@ smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr) fattr->f_mtime = inode->i_mtime; fattr->f_ctime = inode->i_ctime; fattr->f_atime = inode->i_atime; - fattr->f_blksize= inode->i_blksize; fattr->f_blocks = inode->i_blocks; fattr->attr = SMB_I(inode)->attr; @@ -200,7 +199,6 @@ smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr) inode->i_uid = fattr->f_uid; inode->i_gid = fattr->f_gid; inode->i_ctime = fattr->f_ctime; - inode->i_blksize= fattr->f_blksize; inode->i_blocks = fattr->f_blocks; inode->i_size = fattr->f_size; inode->i_mtime = fattr->f_mtime; diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index c3495059889..40e174db987 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -1826,7 +1826,6 @@ smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr) fattr->f_nlink = 1; fattr->f_uid = server->mnt->uid; fattr->f_gid = server->mnt->gid; - fattr->f_blksize = SMB_ST_BLKSIZE; fattr->f_unix = 0; } diff --git a/fs/stat.c b/fs/stat.c index 3a44dcf97da..60a31d5e596 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) stat->ctime = inode->i_ctime; stat->size = i_size_read(inode); stat->blocks = inode->i_blocks; - stat->blksize = inode->i_blksize; + stat->blksize = (1 << inode->i_blkbits); } EXPORT_SYMBOL(generic_fillattr); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index fd7cd5f843d..e79e38d52c0 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -125,7 +125,6 @@ struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) { struct inode * inode = new_inode(sysfs_sb); if (inode) { - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mapping->a_ops = &sysfs_aops; inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index 9b585d1081c..115ab0d6f4b 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c @@ -170,7 +170,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode) inode->i_uid = current->fsuid; inode->i_ino = fs16_to_cpu(sbi, ino); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; memset(SYSV_I(inode)->i_data, 0, sizeof(SYSV_I(inode)->i_data)); SYSV_I(inode)->i_dir_start_lookup = 0; insert_inode_hash(inode); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 58b2d22142b..d63c5e48b05 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -201,7 +201,7 @@ static void sysv_read_inode(struct inode *inode) inode->i_ctime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; - inode->i_blocks = inode->i_blksize = 0; + inode->i_blocks = 0; si = SYSV_I(inode); for (block = 0; block < 10+1+1+1; block++) diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index d1d38238ce6..8206983f2eb 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -121,7 +121,6 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err) UDF_I_LOCATION(inode).logicalBlockNum = block; UDF_I_LOCATION(inode).partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum; inode->i_ino = udf_get_lb_pblock(sb, UDF_I_LOCATION(inode), 0); - inode->i_blksize = PAGE_SIZE; inode->i_blocks = 0; UDF_I_LENEATTR(inode) = 0; UDF_I_LENALLOC(inode) = 0; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 605f5111b6d..b223b32db99 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -916,8 +916,6 @@ __udf_read_inode(struct inode *inode) * i_nlink = 1 * i_op = NULL; */ - inode->i_blksize = PAGE_SIZE; - bh = udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 0, &ident); if (!bh) diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 9501dcd3b21..2ad1259c6ec 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -255,7 +255,6 @@ cg_found: inode->i_gid = current->fsgid; inode->i_ino = cg * uspi->s_ipg + bit; - inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; ufsi->i_flags = UFS_I(dir)->i_flags; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 30c6e8a9446..ee1eaa6f4ec 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -741,7 +741,6 @@ void ufs_read_inode(struct inode * inode) ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino)); } - inode->i_blksize = PAGE_SIZE;/*This is the optimal IO size (for stat)*/ inode->i_version++; ufsi->i_lastfrag = (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 4754f342a5d..9df9ed37d21 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -171,7 +171,6 @@ xfs_revalidate_inode( break; } - inode->i_blksize = xfs_preferred_iosize(mp); inode->i_generation = ip->i_d.di_gen; i_size_write(inode, ip->i_d.di_size); inode->i_blocks = diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 6628d96b6fd..553fa731ade 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -122,7 +122,6 @@ vn_revalidate_core( inode->i_blocks = vap->va_nblocks; inode->i_mtime = vap->va_mtime; inode->i_ctime = vap->va_ctime; - inode->i_blksize = vap->va_blocksize; if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; else diff --git a/include/linux/fs.h b/include/linux/fs.h index 192e69bb55b..8f74dfbb2ed 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -512,7 +512,6 @@ struct inode { struct timespec i_mtime; struct timespec i_ctime; unsigned int i_blkbits; - unsigned long i_blksize; unsigned long i_version; blkcnt_t i_blocks; unsigned short i_bytes; diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h index f9edcd2ff3c..31a3cb617ce 100644 --- a/include/linux/nfsd/nfsfh.h +++ b/include/linux/nfsd/nfsfh.h @@ -269,14 +269,8 @@ fill_post_wcc(struct svc_fh *fhp) fhp->fh_post_uid = inode->i_uid; fhp->fh_post_gid = inode->i_gid; fhp->fh_post_size = inode->i_size; - if (inode->i_blksize) { - fhp->fh_post_blksize = inode->i_blksize; - fhp->fh_post_blocks = inode->i_blocks; - } else { - fhp->fh_post_blksize = BLOCK_SIZE; - /* how much do we care for accuracy with MinixFS? */ - fhp->fh_post_blocks = (inode->i_size+511) >> 9; - } + fhp->fh_post_blksize = BLOCK_SIZE; + fhp->fh_post_blocks = inode->i_blocks; fhp->fh_post_rdev[0] = htonl((u32)imajor(inode)); fhp->fh_post_rdev[1] = htonl((u32)iminor(inode)); fhp->fh_post_atime = inode->i_atime; diff --git a/include/linux/smb.h b/include/linux/smb.h index 6df3b150155..f098dff93f6 100644 --- a/include/linux/smb.h +++ b/include/linux/smb.h @@ -89,7 +89,6 @@ struct smb_fattr { struct timespec f_atime; struct timespec f_mtime; struct timespec f_ctime; - unsigned long f_blksize; unsigned long f_blocks; int f_unix; }; diff --git a/ipc/mqueue.c b/ipc/mqueue.c index f8381f0660b..840f8a6fb85 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -115,7 +115,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode, inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mtime = inode->i_ctime = inode->i_atime = CURRENT_TIME; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index cff41511269..1b32c2c04c1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -289,7 +289,6 @@ static struct inode *cpuset_new_inode(mode_t mode) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info; diff --git a/mm/shmem.c b/mm/shmem.c index 0a8e29cf87e..eda907c3a86 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1351,7 +1351,6 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_mapping->a_ops = &shmem_aops; inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index f65bea74734..700c6e061a0 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -488,7 +488,6 @@ rpc_get_inode(struct super_block *sb, int mode) return NULL; inode->i_mode = mode; inode->i_uid = inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch(mode & S_IFMT) { diff --git a/security/inode.c b/security/inode.c index 176aacea8ca..49ee5152939 100644 --- a/security/inode.c +++ b/security/inode.c @@ -64,7 +64,6 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev) inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 00534c302ba..bab7b386cb8 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -771,7 +771,6 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode) if (ret) { ret->i_mode = mode; ret->i_uid = ret->i_gid = 0; - ret->i_blksize = PAGE_CACHE_SIZE; ret->i_blocks = 0; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; } -- cgit v1.2.3-70-g09d2 From b89a81712f486e4f7a606987413e387605fdeaf4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 27 Sep 2006 01:51:04 -0700 Subject: [PATCH] sysctl: Allow /proc/sys without sys_sysctl Since sys_sysctl is deprecated start allow it to be compiled out. This should catch any remaining user space code that cares, and paves the way for further sysctl cleanups. [akpm@osdl.org: If sys_sysctl() is not compiled-in, emit a warning] Signed-off-by: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/ia32/sys_ia32.c | 2 +- arch/mips/kernel/linux32.c | 4 +- arch/powerpc/kernel/sys_ppc32.c | 2 +- arch/s390/kernel/compat_linux.c | 2 +- arch/sparc64/kernel/sys_sparc32.c | 2 +- arch/x86_64/ia32/sys_ia32.c | 2 +- fs/Kconfig | 19 +++++++ init/Kconfig | 31 ++++++----- kernel/sysctl.c | 113 +++++++++++++------------------------- 9 files changed, 81 insertions(+), 96 deletions(-) (limited to 'kernel') diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 6aa3c51619c..bddbd22706e 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1942,7 +1942,7 @@ struct sysctl32 { unsigned int __unused[4]; }; -#ifdef CONFIG_SYSCTL +#ifdef CONFIG_SYSCTL_SYSCALL asmlinkage long sys32_sysctl (struct sysctl32 __user *args) { diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index dc500e20cf1..43b1162d714 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -991,7 +991,7 @@ struct sysctl_args32 unsigned int __unused[4]; }; -#ifdef CONFIG_SYSCTL +#ifdef CONFIG_SYSCTL_SYSCALL asmlinkage long sys32_sysctl(struct sysctl_args32 __user *args) { @@ -1032,7 +1032,7 @@ asmlinkage long sys32_sysctl(struct sysctl_args32 __user *args) return error; } -#endif /* CONFIG_SYSCTL */ +#endif /* CONFIG_SYSCTL_SYSCALL */ asmlinkage long sys32_newuname(struct new_utsname __user * name) { diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 2e292863e98..5e391fc2534 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -740,7 +740,7 @@ asmlinkage long compat_sys_umask(u32 mask) return sys_umask((int)mask); } -#ifdef CONFIG_SYSCTL +#ifdef CONFIG_SYSCTL_SYSCALL struct __sysctl_args32 { u32 name; int nlen; diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 785c9f70ac9..91b2884fa5c 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -708,7 +708,7 @@ asmlinkage long sys32_sendfile64(int out_fd, int in_fd, return ret; } -#ifdef CONFIG_SYSCTL +#ifdef CONFIG_SYSCTL_SYSCALL struct __sysctl_args32 { u32 name; int nlen; diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index c88ae23ce81..69444f266e2 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -1016,7 +1016,7 @@ struct __sysctl_args32 { asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args) { -#ifndef CONFIG_SYSCTL +#ifndef CONFIG_SYSCTL_SYSCALL return -ENOSYS; #else struct __sysctl_args32 tmp; diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index b0e82c7947d..f280d3665f4 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -648,7 +648,7 @@ sys32_pause(void) } -#ifdef CONFIG_SYSCTL +#ifdef CONFIG_SYSCTL_SYSCALL struct sysctl_ia32 { unsigned int name; int nlen; diff --git a/fs/Kconfig b/fs/Kconfig index a27002668bd..d311198bba4 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -826,6 +826,25 @@ config PROC_VMCORE help Exports the dump image of crashed kernel in ELF format. +config PROC_SYSCTL + bool "Sysctl support (/proc/sys)" if EMBEDDED + depends on PROC_FS + select SYSCTL + default y + ---help--- + The sysctl interface provides a means of dynamically changing + certain kernel parameters and variables on the fly without requiring + a recompile of the kernel or reboot of the system. The primary + interface is through /proc/sys. If you say Y here a tree of + modifiable sysctl entries will be generated beneath the + /proc/sys directory. They are explained in the files + in . Note that enabling this + option will enlarge the kernel by at least 8 KB. + + As it is generally a good thing, you should say Y here unless + building a kernel for install/rescue disks or your system is very + limited in memory. + config SYSFS bool "sysfs file system support" if EMBEDDED default y diff --git a/init/Kconfig b/init/Kconfig index 9a7656f0b5e..4381006dd66 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -273,21 +273,24 @@ config UID16 This enables the legacy 16-bit UID syscall wrappers. config SYSCTL - bool "Sysctl support" if EMBEDDED - default y + bool + +config SYSCTL_SYSCALL + bool "Sysctl syscall support" + default n + select SYSCTL ---help--- - The sysctl interface provides a means of dynamically changing - certain kernel parameters and variables on the fly without requiring - a recompile of the kernel or reboot of the system. The primary - interface consists of a system call, but if you say Y to "/proc - file system support", a tree of modifiable sysctl entries will be - generated beneath the /proc/sys directory. They are explained in the - files in . Note that enabling this - option will enlarge the kernel by at least 8 KB. - - As it is generally a good thing, you should say Y here unless - building a kernel for install/rescue disks or your system is very - limited in memory. + Enable the deprecated sysctl system call. sys_sysctl uses + binary paths that have been found to be a major pain to maintain + and use. The interface in /proc/sys is now the primary and what + everyone uses. + + Nothing has been using the binary sysctl interface for some time + time now so nothing should break if you disable sysctl syscall + support, and you kernel will get marginally smaller. + + Unless you have an application that uses the sys_syscall interface + you should probably say N here. config KALLSYMS bool "Load all symbols for debugging/kksymoops" if EMBEDDED diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bcb3a181dbb..8bfa7d117c5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -137,8 +137,11 @@ extern int no_unaligned_warning; extern int max_lock_depth; #endif -static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, - ctl_table *, void **); +#ifdef CONFIG_SYSCTL_SYSCALL +static int parse_table(int __user *, int, void __user *, size_t __user *, + void __user *, size_t, ctl_table *, void **); +#endif + static int proc_doutsstring(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -165,7 +168,7 @@ int sysctl_legacy_va_layout; /* /proc declarations: */ -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_PROC_SYSCTL static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *); static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); @@ -1166,12 +1169,13 @@ static void start_unregistering(struct ctl_table_header *p) void __init sysctl_init(void) { -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_PROC_SYSCTL register_proc_table(root_table, proc_sys_root, &root_table_header); init_irq_proc(); #endif } +#ifdef CONFIG_SYSCTL_SYSCALL int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -1225,6 +1229,7 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args) unlock_kernel(); return error; } +#endif /* CONFIG_SYSCTL_SYSCALL */ /* * ctl_perm does NOT grant the superuser all rights automatically, because @@ -1251,6 +1256,7 @@ static inline int ctl_perm(ctl_table *table, int op) return test_perm(table->mode, op); } +#ifdef CONFIG_SYSCTL_SYSCALL static int parse_table(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen, @@ -1340,6 +1346,7 @@ int do_sysctl_strategy (ctl_table *table, } return 0; } +#endif /* CONFIG_SYSCTL_SYSCALL */ /** * register_sysctl_table - register a sysctl hierarchy @@ -1427,7 +1434,7 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, else list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); spin_unlock(&sysctl_lock); -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_PROC_SYSCTL register_proc_table(table, proc_sys_root, tmp); #endif return tmp; @@ -1445,18 +1452,31 @@ void unregister_sysctl_table(struct ctl_table_header * header) might_sleep(); spin_lock(&sysctl_lock); start_unregistering(header); -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_PROC_SYSCTL unregister_proc_table(header->ctl_table, proc_sys_root); #endif spin_unlock(&sysctl_lock); kfree(header); } +#else /* !CONFIG_SYSCTL */ +struct ctl_table_header * register_sysctl_table(ctl_table * table, + int insert_at_head) +{ + return NULL; +} + +void unregister_sysctl_table(struct ctl_table_header * table) +{ +} + +#endif /* CONFIG_SYSCTL */ + /* * /proc/sys support */ -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_PROC_SYSCTL /* Scan the sysctl entries in table and add them all into /proc */ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set) @@ -2318,6 +2338,7 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, #endif /* CONFIG_PROC_FS */ +#ifdef CONFIG_SYSCTL_SYSCALL /* * General sysctl support routines */ @@ -2460,11 +2481,19 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, return 1; } -#else /* CONFIG_SYSCTL */ +#else /* CONFIG_SYSCTL_SYSCALL */ asmlinkage long sys_sysctl(struct __sysctl_args __user *args) { + static int msg_count; + + if (msg_count < 5) { + msg_count++; + printk(KERN_INFO + "warning: process `%s' used the removed sysctl " + "system call\n", current->comm); + } return -ENOSYS; } @@ -2496,73 +2525,7 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, return -ENOSYS; } -int proc_dostring(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, - struct file *filp, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -struct ctl_table_header * register_sysctl_table(ctl_table * table, - int insert_at_head) -{ - return NULL; -} - -void unregister_sysctl_table(struct ctl_table_header * table) -{ -} - -#endif /* CONFIG_SYSCTL */ +#endif /* CONFIG_SYSCTL_SYSCALL */ /* * No sense putting this after each symbol definition, twice, -- cgit v1.2.3-70-g09d2 From c18258c6f0848f97e85287f6271c511a092bb784 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 27 Sep 2006 01:51:06 -0700 Subject: [PATCH] pid: Implement transfer_pid and use it to simplify de_thread In de_thread we move pids from one process to another, a rather ugly case. The function transfer_pid makes it clear what we are doing, and makes the action atomic. This is useful we ever want to atomically traverse the process group and session lists, in a rcu safe manner. Even if the atomic properties this change should be a win as transfer_pid should be less code to execute than executing both attach_pid and detach_pid, and this should make de_thread slightly smaller as only a single function call needs to be emitted. The only downside is that the code might be slower to execute as the odds are against transfer_pid being in cache. Signed-off-by: Eric W. Biederman Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 11 ++++------- include/linux/pid.h | 2 ++ kernel/pid.c | 9 +++++++++ 3 files changed, 15 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/fs/exec.c b/fs/exec.c index 54135df2a96..b7aa3d6422d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -696,23 +696,20 @@ static int de_thread(struct task_struct *tsk) */ /* Become a process group leader with the old leader's pid. - * Note: The old leader also uses thispid until release_task + * The old leader becomes a thread of the this thread group. + * Note: The old leader also uses this pid until release_task * is called. Odd but simple and correct. */ detach_pid(current, PIDTYPE_PID); current->pid = leader->pid; attach_pid(current, PIDTYPE_PID, current->pid); - attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); - attach_pid(current, PIDTYPE_SID, current->signal->session); + transfer_pid(leader, current, PIDTYPE_PGID); + transfer_pid(leader, current, PIDTYPE_SID); list_replace_rcu(&leader->tasks, ¤t->tasks); current->group_leader = current; leader->group_leader = current; - /* Reduce leader to a thread */ - detach_pid(leader, PIDTYPE_PGID); - detach_pid(leader, PIDTYPE_SID); - current->exit_signal = SIGCHLD; BUG_ON(leader->exit_state != EXIT_ZOMBIE); diff --git a/include/linux/pid.h b/include/linux/pid.h index 29960b03bef..93da7e2d9f3 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -76,6 +76,8 @@ extern int FASTCALL(attach_pid(struct task_struct *task, enum pid_type type, int nr)); extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type)); +extern void FASTCALL(transfer_pid(struct task_struct *old, + struct task_struct *new, enum pid_type)); /* * look up a PID in the hash table. Must be called with the tasklist_lock diff --git a/kernel/pid.c b/kernel/pid.c index 93e212f2067..6db82b68e2f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -252,6 +252,15 @@ void fastcall detach_pid(struct task_struct *task, enum pid_type type) free_pid(pid); } +/* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ +void fastcall transfer_pid(struct task_struct *old, struct task_struct *new, + enum pid_type type) +{ + new->pids[type].pid = old->pids[type].pid; + hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node); + old->pids[type].pid = NULL; +} + struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type) { struct task_struct *result = NULL; -- cgit v1.2.3-70-g09d2 From 65800ac77e080cf159d6c1207b6886e18f22bc08 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 27 Sep 2006 01:51:11 -0700 Subject: [PATCH] pid: remove temporary debug code in attach_pid With the patches flying between Oleg and myself somehow this temporary debug code got left in pid.c. It was never intended to make it to the stable kernel. Signed-off-by: Eric W. Biederman Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/pid.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/pid.c b/kernel/pid.c index 6db82b68e2f..8387e8c6819 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -223,9 +223,6 @@ int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr) struct pid_link *link; struct pid *pid; - WARN_ON(!task->pid); /* to be removed soon */ - WARN_ON(!nr); /* to be removed soon */ - link = &task->pids[type]; link->pid = pid = find_pid(nr); hlist_add_head_rcu(&link->node, &pid->tasks[type]); -- cgit v1.2.3-70-g09d2