From 0e12f848b337fc034ceb3c0d03d75f8de1b8cc96 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: net: use performance variant for_each_cpu_mask_nr Change references from for_each_cpu_mask to for_each_cpu_mask_nr where appropriate Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- net/core/dev.c | 4 ++-- net/iucv/iucv.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 58296307787..ee61b987f4d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2239,7 +2239,7 @@ out: */ if (!cpus_empty(net_dma.channel_mask)) { int chan_idx; - for_each_cpu_mask(chan_idx, net_dma.channel_mask) { + for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) { struct dma_chan *chan = net_dma.channels[chan_idx]; if (chan) dma_async_memcpy_issue_pending(chan); @@ -4300,7 +4300,7 @@ static void net_dma_rebalance(struct net_dma *net_dma) i = 0; cpu = first_cpu(cpu_online_map); - for_each_cpu_mask(chan_idx, net_dma->channel_mask) { + for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) { chan = net_dma->channels[chan_idx]; n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 91897076213..8de51107059 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -497,7 +497,7 @@ static void iucv_setmask_up(void) /* Disable all cpu but the first in cpu_irq_cpumask. */ cpumask = iucv_irq_cpumask; cpu_clear(first_cpu(iucv_irq_cpumask), cpumask); - for_each_cpu_mask(cpu, cpumask) + for_each_cpu_mask_nr(cpu, cpumask) smp_call_function_single(cpu, iucv_block_cpu, NULL, 0, 1); } -- cgit v1.2.3-70-g09d2 From 3f9b48a7584851997702cdc3f58e7811b5546397 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: net: Pass reference to cpumask variable in net/sunrpc/svc.c * Pass reference to cpumask variable instead of using stack. For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- net/sunrpc/svc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 01c7e311b90..d43cf8ddff6 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -603,7 +603,7 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, error = kernel_thread((int (*)(void *)) func, rqstp, 0); if (have_oldmask) - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); if (error < 0) goto out_thread; -- cgit v1.2.3-70-g09d2 From 65c011845316d3c1381f478ca0d8265c43b3b039 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 15 Jul 2008 14:14:30 -0700 Subject: cpumask: Replace cpumask_of_cpu with cpumask_of_cpu_ptr * This patch replaces the dangerous lvalue version of cpumask_of_cpu with new cpumask_of_cpu_ptr macros. These are patterned after the node_to_cpumask_ptr macros. In general terms, if there is a cpumask_of_cpu_map[] then a pointer to the cpumask_of_cpu_map[cpu] entry is used. The cpumask_of_cpu_map is provided when there is a large NR_CPUS count, reducing greatly the amount of code generated and stack space used for cpumask_of_cpu(). The pointer to the cpumask_t value is needed for calling set_cpus_allowed_ptr() to reduce the amount of stack space needed to pass the cpumask_t value. If there isn't a cpumask_of_cpu_map[], then a temporary variable is declared and filled in with value from cpumask_of_cpu(cpu) as well as a pointer variable pointing to this temporary variable. Afterwards, the pointer is used to reference the cpumask value. The compiler will optimize out the extra dereference through the pointer as well as the stack space used for the pointer, resulting in identical code. A good example of the orthogonal usages is in net/sunrpc/svc.c: case SVC_POOL_PERCPU: { unsigned int cpu = m->pool_to[pidx]; cpumask_of_cpu_ptr(cpumask, cpu); *oldmask = current->cpus_allowed; set_cpus_allowed_ptr(current, cpumask); return 1; } case SVC_POOL_PERNODE: { unsigned int node = m->pool_to[pidx]; node_to_cpumask_ptr(nodecpumask, node); *oldmask = current->cpus_allowed; set_cpus_allowed_ptr(current, nodecpumask); return 1; } Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/cstate.c | 3 ++- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 10 +++++--- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 15 +++++++---- arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 9 ++++--- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 3 ++- arch/x86/kernel/cpu/intel_cacheinfo.c | 3 ++- arch/x86/kernel/microcode.c | 13 +++++++--- arch/x86/kernel/reboot.c | 14 +++++++---- drivers/acpi/processor_throttling.c | 11 +++++--- drivers/firmware/dcdbas.c | 3 ++- include/linux/cpumask.h | 32 ++++++++++++++++++++---- kernel/stop_machine.c | 3 ++- kernel/trace/trace_sysprof.c | 4 ++- net/sunrpc/svc.c | 3 ++- 14 files changed, 91 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index c2502eb9aa8..9220cf46aa1 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -73,6 +73,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct cpuinfo_x86 *c = &cpu_data(cpu); cpumask_t saved_mask; + cpumask_of_cpu_ptr(new_mask, cpu); int retval; unsigned int eax, ebx, ecx, edx; unsigned int edx_part; @@ -91,7 +92,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ saved_mask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + retval = set_cpus_allowed_ptr(current, new_mask); if (retval) return -1; diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index dd097b83583..ff2fff56f0a 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -200,10 +200,12 @@ static void drv_read(struct drv_cmd *cmd) static void drv_write(struct drv_cmd *cmd) { cpumask_t saved_mask = current->cpus_allowed; + cpumask_of_cpu_ptr_declare(cpu_mask); unsigned int i; for_each_cpu_mask_nr(i, cmd->mask) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); + cpumask_of_cpu_ptr_next(cpu_mask, i); + set_cpus_allowed_ptr(current, cpu_mask); do_drv_write(cmd); } @@ -267,11 +269,12 @@ static unsigned int get_measured_perf(unsigned int cpu) } aperf_cur, mperf_cur; cpumask_t saved_mask; + cpumask_of_cpu_ptr(cpu_mask, cpu); unsigned int perf_percent; unsigned int retval; saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, cpu_mask); if (get_cpu() != cpu) { /* We were not able to run on requested processor */ put_cpu(); @@ -337,6 +340,7 @@ static unsigned int get_measured_perf(unsigned int cpu) static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { + cpumask_of_cpu_ptr(cpu_mask, cpu); struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); unsigned int freq; unsigned int cached_freq; @@ -349,7 +353,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) } cached_freq = data->freq_table[data->acpi_data->state].frequency; - freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); + freq = extract_freq(get_cur_val(cpu_mask), data); if (freq != cached_freq) { /* * The dreaded BIOS frequency change behind our back. diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index c45ca6d4dce..53c7b693697 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -479,11 +479,12 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi static int check_supported_cpu(unsigned int cpu) { cpumask_t oldmask; + cpumask_of_cpu_ptr(cpu_mask, cpu); u32 eax, ebx, ecx, edx; unsigned int rc = 0; oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, cpu_mask); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); @@ -1016,6 +1017,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { cpumask_t oldmask; + cpumask_of_cpu_ptr(cpu_mask, pol->cpu); struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1030,7 +1032,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); + set_cpus_allowed_ptr(current, cpu_mask); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1105,6 +1107,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask; + cpumask_of_cpu_ptr_declare(newmask); int rc; if (!cpu_online(pol->cpu)) @@ -1156,7 +1159,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); + cpumask_of_cpu_ptr_next(newmask, pol->cpu); + set_cpus_allowed_ptr(current, newmask); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1178,7 +1182,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) set_cpus_allowed_ptr(current, &oldmask); if (cpu_family == CPU_HW_PSTATE) - pol->cpus = cpumask_of_cpu(pol->cpu); + pol->cpus = *newmask; else pol->cpus = per_cpu(cpu_core_map, pol->cpu); data->available_cores = &(pol->cpus); @@ -1244,6 +1248,7 @@ static unsigned int powernowk8_get (unsigned int cpu) { struct powernow_k8_data *data; cpumask_t oldmask = current->cpus_allowed; + cpumask_of_cpu_ptr(newmask, cpu); unsigned int khz = 0; unsigned int first; @@ -1253,7 +1258,7 @@ static unsigned int powernowk8_get (unsigned int cpu) if (!data) return -EINVAL; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, newmask); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 8b0dd6f2a1a..fd561bb26c6 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -313,9 +313,10 @@ static unsigned int get_cur_freq(unsigned int cpu) unsigned l, h; unsigned clock_freq; cpumask_t saved_mask; + cpumask_of_cpu_ptr(new_mask, cpu); saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, new_mask); if (smp_processor_id() != cpu) return 0; @@ -554,9 +555,11 @@ static int centrino_target (struct cpufreq_policy *policy, */ if (!cpus_empty(covered_cpus)) { + cpumask_of_cpu_ptr_declare(new_mask); + for_each_cpu_mask_nr(j, covered_cpus) { - set_cpus_allowed_ptr(current, - &cpumask_of_cpu(j)); + cpumask_of_cpu_ptr_next(new_mask, j); + set_cpus_allowed_ptr(current, new_mask); wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); } } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 191f7263c61..2f3728dc24f 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -244,7 +244,8 @@ static unsigned int _speedstep_get(const cpumask_t *cpus) static unsigned int speedstep_get(unsigned int cpu) { - return _speedstep_get(&cpumask_of_cpu(cpu)); + cpumask_of_cpu_ptr(newmask, cpu); + return _speedstep_get(newmask); } /** diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a7b0f8f1736..e4b8d189d7e 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -516,6 +516,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) unsigned long j; int retval; cpumask_t oldmask; + cpumask_of_cpu_ptr(newmask, cpu); if (num_cache_leaves == 0) return -ENOENT; @@ -526,7 +527,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) return -ENOMEM; oldmask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + retval = set_cpus_allowed_ptr(current, newmask); if (retval) goto out; diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 56b933119a0..58520169e35 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -388,6 +388,7 @@ static int do_microcode_update (void) void *new_mc = NULL; int cpu; cpumask_t old; + cpumask_of_cpu_ptr_declare(newmask); old = current->cpus_allowed; @@ -404,7 +405,8 @@ static int do_microcode_update (void) if (!uci->valid) continue; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + cpumask_of_cpu_ptr_next(newmask, cpu); + set_cpus_allowed_ptr(current, newmask); error = get_maching_microcode(new_mc, cpu); if (error < 0) goto out; @@ -574,6 +576,7 @@ static int apply_microcode_check_cpu(int cpu) struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); unsigned int val[2]; int err = 0; @@ -582,7 +585,7 @@ static int apply_microcode_check_cpu(int cpu) return 0; old = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, newmask); /* Check if the microcode we have in memory matches the CPU */ if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || @@ -620,11 +623,12 @@ static int apply_microcode_check_cpu(int cpu) static void microcode_init_cpu(int cpu, int resume) { cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; old = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, newmask); mutex_lock(µcode_mutex); collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING && !resume) @@ -656,11 +660,12 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) return -EINVAL; if (val == 1) { cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); old = current->cpus_allowed; get_online_cpus(); - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, newmask); mutex_lock(µcode_mutex); if (uci->valid) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f8a62160e15..214bbdfc851 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -403,24 +403,28 @@ void native_machine_shutdown(void) { /* Stop the cpus and apics */ #ifdef CONFIG_SMP - int reboot_cpu_id; /* The boot cpu is always logical cpu 0 */ - reboot_cpu_id = 0; + int reboot_cpu_id = 0; + cpumask_of_cpu_ptr(newmask, reboot_cpu_id); #ifdef CONFIG_X86_32 /* See if there has been given a command line override */ if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && - cpu_online(reboot_cpu)) + cpu_online(reboot_cpu)) { reboot_cpu_id = reboot_cpu; + cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); + } #endif /* Make certain the cpu I'm about to reboot on is online */ - if (!cpu_online(reboot_cpu_id)) + if (!cpu_online(reboot_cpu_id)) { reboot_cpu_id = smp_processor_id(); + cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); + } /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); + set_cpus_allowed_ptr(current, newmask); /* O.K Now that I'm on the appropriate processor, * stop all of the others. diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index a56fc6c4394..a2c3f9cfa54 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -827,6 +827,7 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) static int acpi_processor_get_throttling(struct acpi_processor *pr) { cpumask_t saved_mask; + cpumask_of_cpu_ptr_declare(new_mask); int ret; if (!pr) @@ -838,7 +839,8 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr) * Migrate task to the cpu pointed by pr. */ saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id)); + cpumask_of_cpu_ptr_next(new_mask, pr->id); + set_cpus_allowed_ptr(current, new_mask); ret = pr->throttling.acpi_processor_get_throttling(pr); /* restore the previous state */ set_cpus_allowed_ptr(current, &saved_mask); @@ -987,6 +989,7 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, int acpi_processor_set_throttling(struct acpi_processor *pr, int state) { cpumask_t saved_mask; + cpumask_of_cpu_ptr_declare(new_mask); int ret = 0; unsigned int i; struct acpi_processor *match_pr; @@ -1025,7 +1028,8 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * it can be called only for the cpu pointed by pr. */ if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id)); + cpumask_of_cpu_ptr_next(new_mask, pr->id); + set_cpus_allowed_ptr(current, new_mask); ret = p_throttling->acpi_processor_set_throttling(pr, t_state.target_state); } else { @@ -1056,7 +1060,8 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) continue; } t_state.cpu = i; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); + cpumask_of_cpu_ptr_next(new_mask, i); + set_cpus_allowed_ptr(current, new_mask); ret = match_pr->throttling. acpi_processor_set_throttling( match_pr, t_state.target_state); diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index 25918f7dfd0..0b624e927a6 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -254,6 +254,7 @@ static ssize_t host_control_on_shutdown_store(struct device *dev, static int smi_request(struct smi_cmd *smi_cmd) { cpumask_t old_mask; + cpumask_of_cpu_ptr(new_mask, 0); int ret = 0; if (smi_cmd->magic != SMI_CMD_MAGIC) { @@ -264,7 +265,7 @@ static int smi_request(struct smi_cmd *smi_cmd) /* SMI requires CPU 0 */ old_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(0)); + set_cpus_allowed_ptr(current, new_mask); if (smp_processor_id() != 0) { dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n", __func__); diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 80226e77614..2dbd9a287e7 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -62,6 +62,15 @@ * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids * * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set + *ifdef CONFIG_HAS_CPUMASK_OF_CPU + * cpumask_of_cpu_ptr_declare(v) Declares cpumask_t *v + * cpumask_of_cpu_ptr_next(v, cpu) Sets v = &cpumask_of_cpu_map[cpu] + * cpumask_of_cpu_ptr(v, cpu) Combines above two operations + *else + * cpumask_of_cpu_ptr_declare(v) Declares cpumask_t _v and *v = &_v + * cpumask_of_cpu_ptr_next(v, cpu) Sets _v = cpumask_of_cpu(cpu) + * cpumask_of_cpu_ptr(v, cpu) Combines above two operations + *endif * CPU_MASK_ALL Initializer - all bits set * CPU_MASK_NONE Initializer - no bits set * unsigned long *cpus_addr(mask) Array of unsigned long's in mask @@ -236,11 +245,16 @@ static inline void __cpus_shift_left(cpumask_t *dstp, #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP extern cpumask_t *cpumask_of_cpu_map; -#define cpumask_of_cpu(cpu) (cpumask_of_cpu_map[cpu]) - +#define cpumask_of_cpu(cpu) (cpumask_of_cpu_map[cpu]) +#define cpumask_of_cpu_ptr(v, cpu) \ + const cpumask_t *v = &cpumask_of_cpu(cpu) +#define cpumask_of_cpu_ptr_declare(v) \ + const cpumask_t *v +#define cpumask_of_cpu_ptr_next(v, cpu) \ + v = &cpumask_of_cpu(cpu) #else #define cpumask_of_cpu(cpu) \ -(*({ \ +({ \ typeof(_unused_cpumask_arg_) m; \ if (sizeof(m) == sizeof(unsigned long)) { \ m.bits[0] = 1UL<<(cpu); \ @@ -248,8 +262,16 @@ extern cpumask_t *cpumask_of_cpu_map; cpus_clear(m); \ cpu_set((cpu), m); \ } \ - &m; \ -})) + m; \ +}) +#define cpumask_of_cpu_ptr(v, cpu) \ + cpumask_t _##v = cpumask_of_cpu(cpu); \ + const cpumask_t *v = &_##v +#define cpumask_of_cpu_ptr_declare(v) \ + cpumask_t _##v; \ + const cpumask_t *v = &_##v +#define cpumask_of_cpu_ptr_next(v, cpu) \ + _##v = cpumask_of_cpu(cpu) #endif #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index ba9b2054ecb..738b411ff2d 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -33,8 +33,9 @@ static int stopmachine(void *cpu) { int irqs_disabled = 0; int prepared = 0; + cpumask_of_cpu_ptr(cpumask, (int)(long)cpu); - set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu)); + set_cpus_allowed_ptr(current, cpumask); /* Ack: we are alive */ smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 2301e1e7c60..63528086337 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -213,7 +213,9 @@ static void start_stack_timers(void) int cpu; for_each_online_cpu(cpu) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + cpumask_of_cpu_ptr(new_mask, cpu); + + set_cpus_allowed_ptr(current, new_mask); start_stack_timer(cpu); } set_cpus_allowed_ptr(current, &saved_mask); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index d43cf8ddff6..083d1268813 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -314,9 +314,10 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) case SVC_POOL_PERCPU: { unsigned int cpu = m->pool_to[pidx]; + cpumask_of_cpu_ptr(cpumask, cpu); *oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, cpumask); return 1; } case SVC_POOL_PERNODE: -- cgit v1.2.3-70-g09d2 From c3ee84163e5bc0dc2e1ccf1d3fc412debca73bab Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 21 Jul 2008 09:18:07 -0700 Subject: pkt_sched: Remove unused variable skb in dev_deactivate_queue function. Removed unused variable 'skb' in the dev_deactivate_queue function Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0ddf69286f9..09dead33580 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -576,7 +576,6 @@ static void dev_deactivate_queue(struct net_device *dev, void *_qdisc_default) { struct Qdisc *qdisc_default = _qdisc_default; - struct sk_buff *skb = NULL; struct Qdisc *qdisc; qdisc = dev_queue->qdisc; @@ -588,8 +587,6 @@ static void dev_deactivate_queue(struct net_device *dev, spin_unlock_bh(qdisc_lock(qdisc)); } - - kfree_skb(skb); } static bool some_qdisc_is_running(struct net_device *dev, int lock) -- cgit v1.2.3-70-g09d2 From b6b2fed1f4802b8fcc9d7548a8f785225d38f9a3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 21 Jul 2008 09:48:06 -0700 Subject: net: Improve simple_tx_hash(). Based upon feedback from Eric Dumazet and Andi Kleen. Cure several deficiencies in simple_tx_hash() by using jhash + reciprocol multiply. 1) Eliminates expensive modulus operation. 2) Makes hash less attackable by using random seed. 3) Eliminates endianness hash distribution issues. Signed-off-by: David S. Miller --- net/core/dev.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2eed17bcb2d..7e2d5274333 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -124,6 +124,8 @@ #include #include #include +#include +#include #include "net-sysfs.h" @@ -1668,34 +1670,37 @@ out_kfree_skb: * --BLG */ +static u32 simple_tx_hashrnd; +static int simple_tx_hashrnd_initialized = 0; + static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) { - u32 *addr, *ports, hash, ihl; + u32 addr1, addr2, ports; + u32 hash, ihl; u8 ip_proto; - int alen; + + if (unlikely(!simple_tx_hashrnd_initialized)) { + get_random_bytes(&simple_tx_hashrnd, 4); + simple_tx_hashrnd_initialized = 1; + } switch (skb->protocol) { case __constant_htons(ETH_P_IP): ip_proto = ip_hdr(skb)->protocol; - addr = &ip_hdr(skb)->saddr; + addr1 = ip_hdr(skb)->saddr; + addr2 = ip_hdr(skb)->daddr; ihl = ip_hdr(skb)->ihl; - alen = 2; break; case __constant_htons(ETH_P_IPV6): ip_proto = ipv6_hdr(skb)->nexthdr; - addr = &ipv6_hdr(skb)->saddr.s6_addr32[0]; + addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; + addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; ihl = (40 >> 2); - alen = 8; break; default: return 0; } - ports = (u32 *) (skb_network_header(skb) + (ihl * 4)); - - hash = 0; - while (alen--) - hash ^= *addr++; switch (ip_proto) { case IPPROTO_TCP: @@ -1705,14 +1710,17 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) case IPPROTO_AH: case IPPROTO_SCTP: case IPPROTO_UDPLITE: - hash ^= *ports; + ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); break; default: + ports = 0; break; } - return hash % dev->real_num_tx_queues; + hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); + + return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); } static struct netdev_queue *dev_pick_tx(struct net_device *dev, -- cgit v1.2.3-70-g09d2 From 867d79fb9a4d5929ad8335c896fcfe11c3b2ef14 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 21 Jul 2008 09:54:18 -0700 Subject: net: In __netif_schedule() use WARN_ON instead of BUG_ON Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 7e2d5274333..cbc34c0db37 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1327,7 +1327,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) void __netif_schedule(struct Qdisc *q) { - BUG_ON(q == &noop_qdisc); + if (WARN_ON_ONCE(q == &noop_qdisc)) + return; if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) { struct softnet_data *sd; -- cgit v1.2.3-70-g09d2 From d3678b463df73f5060d7420915080e19baeb379b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 21 Jul 2008 09:56:13 -0700 Subject: Revert "pkt_sched: Make default qdisc nonshared-multiqueue safe." This reverts commit a0c80b80e0fb48129e4e9d6a9ede914f9ff1850d. After discussions with Jamal and Herbert on netdev, we should provide at least minimal prioritization at the qdisc level even in multiqueue situations. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 99 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 77 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 09dead33580..cb625b4d6da 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -356,44 +356,99 @@ static struct Qdisc noqueue_qdisc = { }; -static int fifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) +static const u8 prio2band[TC_PRIO_MAX+1] = + { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; + +/* 3-band FIFO queue: old style, but should be a bit faster than + generic prio+fifo combination. + */ + +#define PFIFO_FAST_BANDS 3 + +static inline struct sk_buff_head *prio2list(struct sk_buff *skb, + struct Qdisc *qdisc) +{ + struct sk_buff_head *list = qdisc_priv(qdisc); + return list + prio2band[skb->priority & TC_PRIO_MAX]; +} + +static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) { - struct sk_buff_head *list = &qdisc->q; + struct sk_buff_head *list = prio2list(skb, qdisc); - if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) + if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { + qdisc->q.qlen++; return __qdisc_enqueue_tail(skb, qdisc, list); + } return qdisc_drop(skb, qdisc); } -static struct sk_buff *fifo_fast_dequeue(struct Qdisc* qdisc) +static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) { - struct sk_buff_head *list = &qdisc->q; + int prio; + struct sk_buff_head *list = qdisc_priv(qdisc); - if (!skb_queue_empty(list)) - return __qdisc_dequeue_head(qdisc, list); + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { + if (!skb_queue_empty(list + prio)) { + qdisc->q.qlen--; + return __qdisc_dequeue_head(qdisc, list + prio); + } + } return NULL; } -static int fifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) +static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) { - return __qdisc_requeue(skb, qdisc, &qdisc->q); + qdisc->q.qlen++; + return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); } -static void fifo_fast_reset(struct Qdisc* qdisc) +static void pfifo_fast_reset(struct Qdisc* qdisc) { - __qdisc_reset_queue(qdisc, &qdisc->q); + int prio; + struct sk_buff_head *list = qdisc_priv(qdisc); + + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) + __qdisc_reset_queue(qdisc, list + prio); + qdisc->qstats.backlog = 0; + qdisc->q.qlen = 0; } -static struct Qdisc_ops fifo_fast_ops __read_mostly = { - .id = "fifo_fast", - .priv_size = 0, - .enqueue = fifo_fast_enqueue, - .dequeue = fifo_fast_dequeue, - .requeue = fifo_fast_requeue, - .reset = fifo_fast_reset, +static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) +{ + struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; + + memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + return skb->len; + +nla_put_failure: + return -1; +} + +static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) +{ + int prio; + struct sk_buff_head *list = qdisc_priv(qdisc); + + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) + skb_queue_head_init(list + prio); + + return 0; +} + +static struct Qdisc_ops pfifo_fast_ops __read_mostly = { + .id = "pfifo_fast", + .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), + .enqueue = pfifo_fast_enqueue, + .dequeue = pfifo_fast_dequeue, + .requeue = pfifo_fast_requeue, + .init = pfifo_fast_init, + .reset = pfifo_fast_reset, + .dump = pfifo_fast_dump, .owner = THIS_MODULE, }; @@ -522,7 +577,7 @@ static void attach_one_default_qdisc(struct net_device *dev, if (dev->tx_queue_len) { qdisc = qdisc_create_dflt(dev, dev_queue, - &fifo_fast_ops, TC_H_ROOT); + &pfifo_fast_ops, TC_H_ROOT); if (!qdisc) { printk(KERN_INFO "%s: activation failed\n", dev->name); return; @@ -550,9 +605,9 @@ void dev_activate(struct net_device *dev) int need_watchdog; /* No queueing discipline is attached to device; - * create default one i.e. fifo_fast for devices, - * which need queueing and noqueue_qdisc for - * virtual interfaces. + create default one i.e. pfifo_fast for devices, + which need queueing and noqueue_qdisc for + virtual interfaces */ if (dev_all_qdisc_sleeping_noop(dev)) -- cgit v1.2.3-70-g09d2 From 0dbff689c2f299e8f63911247925f2728d087688 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Mon, 21 Jul 2008 10:00:51 -0700 Subject: netfilter: nf_nat_core: eliminate useless find_appropriate_src for IP_NAT_RANGE_PROTO_RANDOM Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index d2a887fc8d9..6c6a3cba8d5 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -240,12 +240,12 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, This is only required for source (ie. NAT/masq) mappings. So far, we don't do local source mappings, so multiple manips not an issue. */ - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == IP_NAT_MANIP_SRC && + !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { if (find_appropriate_src(orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); - if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) - if (!nf_nat_used_tuple(tuple, ct)) - return; + if (!nf_nat_used_tuple(tuple, ct)) + return; } } -- cgit v1.2.3-70-g09d2 From 584015727a3b88b46602b20077b46cd04f8b4ab3 Mon Sep 17 00:00:00 2001 From: Krzysztof Piotr Oledzki Date: Mon, 21 Jul 2008 10:01:34 -0700 Subject: netfilter: accounting rework: ct_extend + 64bit counters (v4) Initially netfilter has had 64bit counters for conntrack-based accounting, but it was changed in 2.6.14 to save memory. Unfortunately in-kernel 64bit counters are still required, for example for "connbytes" extension. However, 64bit counters waste a lot of memory and it was not possible to enable/disable it runtime. This patch: - reimplements accounting with respect to the extension infrastructure, - makes one global version of seq_print_acct() instead of two seq_print_counters(), - makes it possible to enable it at boot time (for CONFIG_SYSCTL/CONFIG_SYSFS=n), - makes it possible to enable/disable it at runtime by sysctl or sysfs, - extends counters from 32bit to 64bit, - renames ip_conntrack_counter -> nf_conn_counter, - enables accounting code unconditionally (no longer depends on CONFIG_NF_CT_ACCT), - set initial accounting enable state based on CONFIG_NF_CT_ACCT - removes buggy IPCT_COUNTER_FILLING event handling. If accounting is enabled newly created connections get additional acct extend. Old connections are not changed as it is not possible to add a ct_extend area to confirmed conntrack. Accounting is performed for all connections with acct extend regardless of a current state of "net.netfilter.nf_conntrack_acct". Signed-off-by: Krzysztof Piotr Oledzki Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- Documentation/feature-removal-schedule.txt | 10 ++ Documentation/kernel-parameters.txt | 7 ++ include/linux/netfilter/nf_conntrack_common.h | 8 +- include/linux/netfilter/nfnetlink_conntrack.h | 8 +- include/net/netfilter/nf_conntrack.h | 6 -- include/net/netfilter/nf_conntrack_acct.h | 51 ++++++++++ include/net/netfilter/nf_conntrack_extend.h | 2 + .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 18 +--- net/netfilter/Kconfig | 9 ++ net/netfilter/Makefile | 2 +- net/netfilter/nf_conntrack_acct.c | 104 +++++++++++++++++++++ net/netfilter/nf_conntrack_core.c | 39 +++++--- net/netfilter/nf_conntrack_netlink.c | 44 +++++---- net/netfilter/nf_conntrack_standalone.c | 18 +--- net/netfilter/xt_connbytes.c | 8 +- 15 files changed, 248 insertions(+), 86 deletions(-) create mode 100644 include/net/netfilter/nf_conntrack_acct.h create mode 100644 net/netfilter/nf_conntrack_acct.c (limited to 'net') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 86334b6f823..9f73587219e 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -336,3 +336,13 @@ When: After the only user (hal) has seen a release with the patches Why: Over 1K .text/.data size reduction, data is available in other ways (ioctls) Who: Johannes Berg + +--------------------------- + +What: CONFIG_NF_CT_ACCT +When: 2.6.29 +Why: Accounting can now be enabled/disabled without kernel recompilation. + Currently used only to set a default value for a feature that is also + controlled by a kernel/module/sysfs/sysctl parameter. +Who: Krzysztof Piotr Oledzki + diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 09ad7450647..e4ef2758440 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1279,6 +1279,13 @@ and is between 256 and 4096 characters. It is defined in the file This usage is only documented in each driver source file if at all. + nf_conntrack.acct= + [NETFILTER] Enable connection tracking flow accounting + 0 to disable accounting + 1 to enable accounting + Default value depends on CONFIG_NF_CT_ACCT that is + going to be removed in 2.6.29. + nfsaddrs= [NFS] See Documentation/filesystems/nfsroot.txt. diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index bad1eb760f6..885cbe28226 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -122,7 +122,7 @@ enum ip_conntrack_events IPCT_NATINFO_BIT = 10, IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), - /* Counter highest bit has been set */ + /* Counter highest bit has been set, unused */ IPCT_COUNTER_FILLING_BIT = 11, IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), @@ -145,12 +145,6 @@ enum ip_conntrack_expect_events { }; #ifdef __KERNEL__ -struct ip_conntrack_counter -{ - u_int32_t packets; - u_int32_t bytes; -}; - struct ip_conntrack_stat { unsigned int searched; diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 759bc043dc6..c19595c8930 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -115,10 +115,10 @@ enum ctattr_protoinfo_sctp { enum ctattr_counters { CTA_COUNTERS_UNSPEC, - CTA_COUNTERS_PACKETS, /* old 64bit counters */ - CTA_COUNTERS_BYTES, /* old 64bit counters */ - CTA_COUNTERS32_PACKETS, - CTA_COUNTERS32_BYTES, + CTA_COUNTERS_PACKETS, /* 64bit counters */ + CTA_COUNTERS_BYTES, /* 64bit counters */ + CTA_COUNTERS32_PACKETS, /* old 32bit counters, unused */ + CTA_COUNTERS32_BYTES, /* old 32bit counters, unused */ __CTA_COUNTERS_MAX }; #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8f5b75734dd..0741ad592da 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -88,7 +88,6 @@ struct nf_conn_help { u8 expecting[NF_CT_MAX_EXPECT_CLASSES]; }; - #include #include @@ -111,11 +110,6 @@ struct nf_conn /* Timer function; drops refcnt when it goes off. */ struct timer_list timeout; -#ifdef CONFIG_NF_CT_ACCT - /* Accounting Information (same cache line as other written members) */ - struct ip_conntrack_counter counters[IP_CT_DIR_MAX]; -#endif - #if defined(CONFIG_NF_CONNTRACK_MARK) u_int32_t mark; #endif diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h new file mode 100644 index 00000000000..5d5ae55d54c --- /dev/null +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -0,0 +1,51 @@ +/* + * (C) 2008 Krzysztof Piotr Oledzki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _NF_CONNTRACK_ACCT_H +#define _NF_CONNTRACK_ACCT_H +#include +#include +#include +#include + +struct nf_conn_counter { + u_int64_t packets; + u_int64_t bytes; +}; + +extern int nf_ct_acct; + +static inline +struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct) +{ + return nf_ct_ext_find(ct, NF_CT_EXT_ACCT); +} + +static inline +struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) +{ + struct nf_conn_counter *acct; + + if (!nf_ct_acct) + return NULL; + + acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp); + if (!acct) + pr_debug("failed to add accounting extension area"); + + + return acct; +}; + +extern unsigned int +seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir); + +extern int nf_conntrack_acct_init(void); +extern void nf_conntrack_acct_fini(void); + +#endif /* _NF_CONNTRACK_ACCT_H */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index f80c0ed6d87..da8ee52613a 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -7,11 +7,13 @@ enum nf_ct_ext_id { NF_CT_EXT_HELPER, NF_CT_EXT_NAT, + NF_CT_EXT_ACCT, NF_CT_EXT_NUM, }; #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat +#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 40a46d48249..3a020720e40 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -18,19 +18,7 @@ #include #include #include - -#ifdef CONFIG_NF_CT_ACCT -static unsigned int -seq_print_counters(struct seq_file *s, - const struct ip_conntrack_counter *counter) -{ - return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)counter->packets, - (unsigned long long)counter->bytes); -} -#else -#define seq_print_counters(x, y) 0 -#endif +#include struct ct_iter_state { unsigned int bucket; @@ -127,7 +115,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL])) + if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) return -ENOSPC; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) @@ -138,7 +126,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY])) + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) return -ENOSPC; if (test_bit(IPS_ASSURED_BIT, &ct->status)) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 316c7af1d2b..ee898e74808 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -49,6 +49,15 @@ config NF_CT_ACCT Those counters can be used for flow-based accounting or the `connbytes' match. + Please note that currently this option only sets a default state. + You may change it at boot time with nf_conntrack.acct=0/1 kernel + paramater or by loading the nf_conntrack module with acct=0/1. + + You may also disable/enable it on a running system with: + sysctl net.netfilter.nf_conntrack_acct=0/1 + + This option will be removed in 2.6.29. + If unsure, say `N'. config NF_CONNTRACK_MARK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5c4b183f642..3bd2cc556ae 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,6 +1,6 @@ netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o -nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o +nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o obj-$(CONFIG_NETFILTER) = netfilter.o diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c new file mode 100644 index 00000000000..59bd8b903a1 --- /dev/null +++ b/net/netfilter/nf_conntrack_acct.c @@ -0,0 +1,104 @@ +/* Accouting handling for netfilter. */ + +/* + * (C) 2008 Krzysztof Piotr Oledzki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_NF_CT_ACCT +#define NF_CT_ACCT_DEFAULT 1 +#else +#define NF_CT_ACCT_DEFAULT 0 +#endif + +int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; +EXPORT_SYMBOL_GPL(nf_ct_acct); + +module_param_named(acct, nf_ct_acct, bool, 0644); +MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); + +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *acct_sysctl_header; +static struct ctl_table acct_sysctl_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_acct", + .data = &nf_ct_acct, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + {} +}; +#endif /* CONFIG_SYSCTL */ + +unsigned int +seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) +{ + struct nf_conn_counter *acct; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; + + return seq_printf(s, "packets=%llu bytes=%llu ", + (unsigned long long)acct[dir].packets, + (unsigned long long)acct[dir].bytes); +}; +EXPORT_SYMBOL_GPL(seq_print_acct); + +static struct nf_ct_ext_type acct_extend __read_mostly = { + .len = sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]), + .align = __alignof__(struct nf_conn_counter[IP_CT_DIR_MAX]), + .id = NF_CT_EXT_ACCT, +}; + +int nf_conntrack_acct_init(void) +{ + int ret; + +#ifdef CONFIG_NF_CT_ACCT + printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n"); + printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); + printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); +#endif + + ret = nf_ct_extend_register(&acct_extend); + if (ret < 0) { + printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); + return ret; + } + +#ifdef CONFIG_SYSCTL + acct_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path, + acct_sysctl_table); + + if (!acct_sysctl_header) { + nf_ct_extend_unregister(&acct_extend); + + printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n"); + return -ENOMEM; + } +#endif + + return 0; +} + +void nf_conntrack_acct_fini(void) +{ +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(acct_sysctl_header); +#endif + nf_ct_extend_unregister(&acct_extend); +} diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 28d03e64200..c519d090bdb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -37,6 +37,7 @@ #include #include #include +#include #define NF_CONNTRACK_VERSION "0.5.0" @@ -555,6 +556,8 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } + nf_ct_acct_ext_add(ct, GFP_ATOMIC); + spin_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(tuple); if (exp) { @@ -828,17 +831,16 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, } acct: -#ifdef CONFIG_NF_CT_ACCT if (do_acct) { - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - skb->len - skb_network_offset(skb); + struct nf_conn_counter *acct; - if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) - || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) - event |= IPCT_COUNTER_FILLING; + acct = nf_conn_acct_find(ct); + if (acct) { + acct[CTINFO2DIR(ctinfo)].packets++; + acct[CTINFO2DIR(ctinfo)].bytes += + skb->len - skb_network_offset(skb); + } } -#endif spin_unlock_bh(&nf_conntrack_lock); @@ -853,15 +855,19 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, const struct sk_buff *skb, int do_acct) { -#ifdef CONFIG_NF_CT_ACCT if (do_acct) { + struct nf_conn_counter *acct; + spin_lock_bh(&nf_conntrack_lock); - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - skb->len - skb_network_offset(skb); + acct = nf_conn_acct_find(ct); + if (acct) { + acct[CTINFO2DIR(ctinfo)].packets++; + acct[CTINFO2DIR(ctinfo)].bytes += + skb->len - skb_network_offset(skb); + } spin_unlock_bh(&nf_conntrack_lock); } -#endif + if (del_timer(&ct->timeout)) { ct->timeout.function((unsigned long)ct); return true; @@ -1029,6 +1035,7 @@ void nf_conntrack_cleanup(void) nf_conntrack_proto_fini(); nf_conntrack_helper_fini(); nf_conntrack_expect_fini(); + nf_conntrack_acct_fini(); } struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced) @@ -1168,6 +1175,10 @@ int __init nf_conntrack_init(void) if (ret < 0) goto out_fini_expect; + ret = nf_conntrack_acct_init(); + if (ret < 0) + goto out_fini_helper; + /* For use by REJECT target */ rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); @@ -1180,6 +1191,8 @@ int __init nf_conntrack_init(void) return ret; +out_fini_helper: + nf_conntrack_helper_fini(); out_fini_expect: nf_conntrack_expect_fini(); out_fini_proto: diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 95a7967731f..105a616c5c7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -37,6 +37,7 @@ #include #include #include +#include #ifdef CONFIG_NF_NAT_NEEDED #include #include @@ -206,22 +207,26 @@ nla_put_failure: return -1; } -#ifdef CONFIG_NF_CT_ACCT static int ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, enum ip_conntrack_dir dir) { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; struct nlattr *nest_count; + const struct nf_conn_counter *acct; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; nest_count = nla_nest_start(skb, type | NLA_F_NESTED); if (!nest_count) goto nla_put_failure; - NLA_PUT_BE32(skb, CTA_COUNTERS32_PACKETS, - htonl(ct->counters[dir].packets)); - NLA_PUT_BE32(skb, CTA_COUNTERS32_BYTES, - htonl(ct->counters[dir].bytes)); + NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, + cpu_to_be64(acct[dir].packets)); + NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, + cpu_to_be64(acct[dir].bytes)); nla_nest_end(skb, nest_count); @@ -230,9 +235,6 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, nla_put_failure: return -1; } -#else -#define ctnetlink_dump_counters(a, b, c) (0) -#endif #ifdef CONFIG_NF_CONNTRACK_MARK static inline int @@ -501,11 +503,6 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, goto nla_put_failure; #endif - if (events & IPCT_COUNTER_FILLING && - (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)) - goto nla_put_failure; - if (events & IPCT_RELATED && ctnetlink_dump_master(skb, ct) < 0) goto nla_put_failure; @@ -576,11 +573,15 @@ restart: cb->args[1] = (unsigned long)ct; goto out; } -#ifdef CONFIG_NF_CT_ACCT + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) - memset(&ct->counters, 0, sizeof(ct->counters)); -#endif + IPCTNL_MSG_CT_GET_CTRZERO) { + struct nf_conn_counter *acct; + + acct = nf_conn_acct_find(ct); + if (acct) + memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); + } } if (cb->args[1]) { cb->args[1] = 0; @@ -832,14 +833,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, u_int8_t u3 = nfmsg->nfgen_family; int err = 0; - if (nlh->nlmsg_flags & NLM_F_DUMP) { -#ifndef CONFIG_NF_CT_ACCT - if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) - return -ENOTSUPP; -#endif + if (nlh->nlmsg_flags & NLM_F_DUMP) return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, ctnetlink_done); - } if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); @@ -1152,6 +1148,8 @@ ctnetlink_create_conntrack(struct nlattr *cda[], goto err; } + nf_ct_acct_ext_add(ct, GFP_KERNEL); + #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 46ea542d0df..869ef9349d0 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -25,6 +25,7 @@ #include #include #include +#include MODULE_LICENSE("GPL"); @@ -38,19 +39,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, } EXPORT_SYMBOL_GPL(print_tuple); -#ifdef CONFIG_NF_CT_ACCT -static unsigned int -seq_print_counters(struct seq_file *s, - const struct ip_conntrack_counter *counter) -{ - return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)counter->packets, - (unsigned long long)counter->bytes); -} -#else -#define seq_print_counters(x, y) 0 -#endif - struct ct_iter_state { unsigned int bucket; }; @@ -146,7 +134,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL])) + if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) return -ENOSPC; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) @@ -157,7 +145,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY])) + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) return -ENOSPC; if (test_bit(IPS_ASSURED_BIT, &ct->status)) diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index d7e8983cd37..3e39c4fe193 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -8,6 +8,7 @@ #include #include #include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); @@ -27,12 +28,15 @@ connbytes_mt(const struct sk_buff *skb, const struct net_device *in, u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; u_int64_t pkts = 0; - const struct ip_conntrack_counter *counters; + const struct nf_conn_counter *counters; ct = nf_ct_get(skb, &ctinfo); if (!ct) return false; - counters = ct->counters; + + counters = nf_conn_acct_find(ct); + if (!counters) + return false; switch (sinfo->what) { case XT_CONNBYTES_PKTS: -- cgit v1.2.3-70-g09d2 From 280763c053fee297d95b474f2c145990670371e6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 21 Jul 2008 10:02:12 -0700 Subject: netfilter: xt_time: fix time's time_mt()'s use of do_div() Fix netfilter xt_time's time_mt()'s use of do_div() on an s64 by using div_s64() instead. This was introduced by patch ee4411a1b1e0b679c99686629b5eab5a072ce49f ("[NETFILTER]: x_tables: add xt_time match"). Signed-off-by: David Howells Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index ed76baab473..9f328593287 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -173,7 +173,7 @@ time_mt(const struct sk_buff *skb, const struct net_device *in, __net_timestamp((struct sk_buff *)skb); stamp = ktime_to_ns(skb->tstamp); - do_div(stamp, NSEC_PER_SEC); + stamp = div_s64(stamp, NSEC_PER_SEC); if (info->flags & XT_TIME_LOCAL_TZ) /* Adjust for local timezone */ -- cgit v1.2.3-70-g09d2 From 72961ecf84d67d6359a1b30f9b2a8427f13e1e71 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Mon, 21 Jul 2008 10:02:35 -0700 Subject: netfilter: nfnetlink_log: send complete hardware header This patch adds some fields to NFLOG to be able to send the complete hardware header with all necessary informations. It sends to userspace: * the type of hardware link * the lenght of hardware header * the hardware header Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_log.h | 3 +++ net/netfilter/nfnetlink_log.c | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'net') diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h index a8572133292..f661731f3cb 100644 --- a/include/linux/netfilter/nfnetlink_log.h +++ b/include/linux/netfilter/nfnetlink_log.h @@ -48,6 +48,9 @@ enum nfulnl_attr_type { NFULA_SEQ, /* instance-local sequence number */ NFULA_SEQ_GLOBAL, /* global sequence number */ NFULA_GID, /* group id of socket */ + NFULA_HWTYPE, /* hardware type */ + NFULA_HWHEADER, /* hardware header */ + NFULA_HWLEN, /* hardware header length */ __NFULA_MAX }; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b8173af8c24..9a35b57ab76 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -453,6 +453,14 @@ __build_packet_message(struct nfulnl_instance *inst, } } + if (indev && skb_mac_header_was_set(skb)) { + NLA_PUT_BE16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)); + NLA_PUT_BE16(inst->skb, NFULA_HWLEN, + htons(skb->dev->hard_header_len)); + NLA_PUT(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, + skb_mac_header(skb)); + } + if (skb->tstamp.tv64) { struct nfulnl_msg_packet_timestamp ts; struct timeval tv = ktime_to_timeval(skb->tstamp); -- cgit v1.2.3-70-g09d2 From db1a75bdcc1766dc7e1fae9201ae287dcbcb6c66 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 21 Jul 2008 10:02:59 -0700 Subject: netfilter: xt_TCPMSS: collapse tcpmss_reverse_mtu{4,6} into one function Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_TCPMSS.c | 42 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 217e2b68632..beb5094703c 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -147,17 +147,21 @@ tcpmss_mangle_packet(struct sk_buff *skb, return TCPOLEN_MSS; } -static u_int32_t tcpmss_reverse_mtu4(const struct iphdr *iph) +static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, + unsigned int family) { - struct flowi fl = { - .fl4_dst = iph->saddr, - }; + struct flowi fl = {}; const struct nf_afinfo *ai; struct rtable *rt = NULL; u_int32_t mtu = ~0U; + if (family == PF_INET) + fl.fl4_dst = ip_hdr(skb)->saddr; + else + fl.fl6_dst = ipv6_hdr(skb)->saddr; + rcu_read_lock(); - ai = nf_get_afinfo(AF_INET); + ai = nf_get_afinfo(family); if (ai != NULL) ai->route((struct dst_entry **)&rt, &fl); rcu_read_unlock(); @@ -178,7 +182,8 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in, __be16 newlen; int ret; - ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu4(iph), + ret = tcpmss_mangle_packet(skb, targinfo, + tcpmss_reverse_mtu(skb, PF_INET), iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); if (ret < 0) @@ -193,28 +198,6 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in, } #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) -static u_int32_t tcpmss_reverse_mtu6(const struct ipv6hdr *iph) -{ - struct flowi fl = { - .fl6_dst = iph->saddr, - }; - const struct nf_afinfo *ai; - struct rtable *rt = NULL; - u_int32_t mtu = ~0U; - - rcu_read_lock(); - ai = nf_get_afinfo(AF_INET6); - if (ai != NULL) - ai->route((struct dst_entry **)&rt, &fl); - rcu_read_unlock(); - - if (rt != NULL) { - mtu = dst_mtu(&rt->u.dst); - dst_release(&rt->u.dst); - } - return mtu; -} - static unsigned int tcpmss_tg6(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -229,7 +212,8 @@ tcpmss_tg6(struct sk_buff *skb, const struct net_device *in, tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); if (tcphoff < 0) return NF_DROP; - ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu6(ipv6h), + ret = tcpmss_mangle_packet(skb, targinfo, + tcpmss_reverse_mtu(skb, PF_INET6), tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) -- cgit v1.2.3-70-g09d2 From c71529e42ce39c167dc53430cb8f3d5634af77df Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Jul 2008 10:03:23 -0700 Subject: netfilter: nf_nat_sip: c= is optional for session According to RFC2327, the connection information is optional in the session description since it can be specified in the media description instead. My provider does exactly that and does not provide any connection information in the session description. As a result the new kernel drops all invite responses. This patch makes it optional as documented. Signed-off-by: Herbert Xu Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_sip.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 4334d5cabc5..14544320c54 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -318,11 +318,11 @@ static int mangle_content_len(struct sk_buff *skb, buffer, buflen); } -static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr, - unsigned int dataoff, unsigned int *datalen, - enum sdp_header_types type, - enum sdp_header_types term, - char *buffer, int buflen) +static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, + unsigned int dataoff, unsigned int *datalen, + enum sdp_header_types type, + enum sdp_header_types term, + char *buffer, int buflen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -330,9 +330,9 @@ static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr, if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term, &matchoff, &matchlen) <= 0) - return 0; + return -ENOENT; return mangle_packet(skb, dptr, datalen, matchoff, matchlen, - buffer, buflen); + buffer, buflen) ? 0 : -EINVAL; } static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, @@ -346,8 +346,8 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, unsigned int buflen; buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip)); - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, - buffer, buflen)) + if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, + buffer, buflen)) return 0; return mangle_content_len(skb, dptr, datalen); @@ -381,15 +381,27 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, /* Mangle session description owner and contact addresses */ buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip)); - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, + if (mangle_sdp_packet(skb, dptr, dataoff, datalen, SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, buffer, buflen)) return 0; - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, - SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, - buffer, buflen)) + switch (mangle_sdp_packet(skb, dptr, dataoff, datalen, + SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, + buffer, buflen)) { + case 0: + /* + * RFC 2327: + * + * Session description + * + * c=* (connection information - not required if included in all media) + */ + case -ENOENT: + break; + default: return 0; + } return mangle_content_len(skb, dptr, datalen); } -- cgit v1.2.3-70-g09d2 From 5547cd0ae8b46db9a084505239294eed9b8c8e2d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 21 Jul 2008 10:03:49 -0700 Subject: netfilter: nf_conntrack_sctp: fix sparse warnings Introduced by a258860e (netfilter: ctnetlink: add full support for SCTP to ctnetlink): net/netfilter/nf_conntrack_proto_sctp.c:483:2: warning: cast from restricted type net/netfilter/nf_conntrack_proto_sctp.c:483:2: warning: incorrect type in argument 1 (different base types) net/netfilter/nf_conntrack_proto_sctp.c:483:2: expected unsigned int [unsigned] [usertype] x net/netfilter/nf_conntrack_proto_sctp.c:483:2: got restricted unsigned int const net/netfilter/nf_conntrack_proto_sctp.c:483:2: warning: cast from restricted type net/netfilter/nf_conntrack_proto_sctp.c:483:2: warning: cast from restricted type net/netfilter/nf_conntrack_proto_sctp.c:483:2: warning: cast from restricted type net/netfilter/nf_conntrack_proto_sctp.c:483:2: warning: cast from restricted type net/netfilter/nf_conntrack_proto_sctp.c:487:2: warning: cast from restricted type net/netfilter/nf_conntrack_proto_sctp.c:487:2: warning: incorrect type in argument 1 (different base types) net/netfilter/nf_conntrack_proto_sctp.c:487:2: expected unsigned int [unsigned] [usertype] x net/netfilter/nf_conntrack_proto_sctp.c:487:2: got restricted unsigned int const net/netfilter/nf_conntrack_proto_sctp.c:487:2: warning: cast from restricted type net/netfilter/nf_conntrack_proto_sctp.c:487:2: warning: cast from restricted type net/netfilter/nf_conntrack_proto_sctp.c:487:2: warning: cast from restricted type net/netfilter/nf_conntrack_proto_sctp.c:487:2: warning: cast from restricted type net/netfilter/nf_conntrack_proto_sctp.c:532:42: warning: incorrect type in assignment (different base types) net/netfilter/nf_conntrack_proto_sctp.c:532:42: expected restricted unsigned int net/netfilter/nf_conntrack_proto_sctp.c:532:42: got unsigned int net/netfilter/nf_conntrack_proto_sctp.c:534:39: warning: incorrect type in assignment (different base types) net/netfilter/nf_conntrack_proto_sctp.c:534:39: expected restricted unsigned int net/netfilter/nf_conntrack_proto_sctp.c:534:39: got unsigned int Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_proto_sctp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 41183a4d2d6..30aa5b94a77 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -482,11 +482,11 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, NLA_PUT_BE32(skb, CTA_PROTOINFO_SCTP_VTAG_ORIGINAL, - htonl(ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL])); + ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]); NLA_PUT_BE32(skb, CTA_PROTOINFO_SCTP_VTAG_REPLY, - htonl(ct->proto.sctp.vtag[IP_CT_DIR_REPLY])); + ct->proto.sctp.vtag[IP_CT_DIR_REPLY]); read_unlock_bh(&sctp_lock); @@ -530,9 +530,9 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) write_lock_bh(&sctp_lock); ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = - ntohl(nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL])); + nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = - ntohl(nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])); + nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]); write_unlock_bh(&sctp_lock); return 0; -- cgit v1.2.3-70-g09d2 From 847499ce71bdcc8fc542062df6ebed3e596608dd Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 21 Jul 2008 13:21:35 -0700 Subject: ipv6: use timer pending This fixes the bridge reference count problem and cleanups ipv6 FIB timer management. Don't use expires field, because it is not a proper way to test, instead use timer_pending(). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 4de2b9efcac..944095cf5e3 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -661,17 +661,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) { - if (net->ipv6.ip6_fib_timer->expires == 0 && + if (!timer_pending(net->ipv6.ip6_fib_timer) && (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) - mod_timer(net->ipv6.ip6_fib_timer, jiffies + - net->ipv6.sysctl.ip6_rt_gc_interval); + mod_timer(net->ipv6.ip6_fib_timer, + jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } void fib6_force_start_gc(struct net *net) { - if (net->ipv6.ip6_fib_timer->expires == 0) - mod_timer(net->ipv6.ip6_fib_timer, jiffies + - net->ipv6.sysctl.ip6_rt_gc_interval); + if (!timer_pending(net->ipv6.ip6_fib_timer)) + mod_timer(net->ipv6.ip6_fib_timer, + jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } /* -- cgit v1.2.3-70-g09d2 From 7943986ca1138ac99597b1aa4dc893012dcfdc08 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 21 Jul 2008 13:28:44 -0700 Subject: net: use kcalloc in netdev_queue alloc Minor nit, use size_t for allocation size and kcalloc to allocate an array. Probably makes no actual code difference. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index cbc34c0db37..1698b399898 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4207,7 +4207,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, { struct netdev_queue *tx; struct net_device *dev; - int alloc_size; + size_t alloc_size; void *p; BUG_ON(strlen(name) >= sizeof(dev->name)); @@ -4227,7 +4227,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, return NULL; } - tx = kzalloc(sizeof(struct netdev_queue) * queue_count, GFP_KERNEL); + tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); if (!tx) { printk(KERN_ERR "alloc_netdev: Unable to allocate " "tx qdiscs.\n"); -- cgit v1.2.3-70-g09d2 From 6579e57b31d79d31d9b806e41ba48774e73257dc Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 21 Jul 2008 13:31:48 -0700 Subject: net: Print the module name as part of the watchdog message As suggested by Dave: This patch adds a function to get the driver name from a struct net_device, and consequently uses this in the watchdog timeout handler to print as part of the message. Signed-off-by: Arjan van de Ven Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 20 ++++++++++++++++++++ net/sched/sch_generic.c | 6 +++--- 3 files changed, 25 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 812bcd8b436..f5ea445f89f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1645,6 +1645,8 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern int netdev_class_create_file(struct class_attribute *class_attr); extern void netdev_class_remove_file(struct class_attribute *class_attr); +extern char *netdev_drivername(struct net_device *dev, char *buffer, int len); + extern void linkwatch_run_queue(void); extern int netdev_compute_features(unsigned long all, unsigned long one); diff --git a/net/core/dev.c b/net/core/dev.c index 1698b399898..ad5598d2bb3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4686,6 +4686,26 @@ err_name: return -ENOMEM; } +char *netdev_drivername(struct net_device *dev, char *buffer, int len) +{ + struct device_driver *driver; + struct device *parent; + + if (len <= 0 || !buffer) + return buffer; + buffer[0] = 0; + + parent = dev->dev.parent; + + if (!parent) + return buffer; + + driver = parent->driver; + if (driver && driver->name) + strlcpy(buffer, driver->name, len); + return buffer; +} + static void __net_exit netdev_exit(struct net *net) { kfree(net->dev_name_head); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index cb625b4d6da..4ac7e3a8c25 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -212,9 +212,9 @@ static void dev_watchdog(unsigned long arg) if (some_queue_stopped && time_after(jiffies, (dev->trans_start + dev->watchdog_timeo))) { - printk(KERN_INFO "NETDEV WATCHDOG: %s: " - "transmit timed out\n", - dev->name); + char drivername[64]; + printk(KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", + dev->name, netdev_drivername(dev, drivername, 64)); dev->tx_timeout(dev); WARN_ON_ONCE(1); } -- cgit v1.2.3-70-g09d2 From 47112e25da41d9059626033986dc3353e101f815 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 21 Jul 2008 13:35:08 -0700 Subject: udplite: Protection against coverage value wrap-around This patch clamps the cscov setsockopt values to a maximum of 0xFFFF. Setsockopt values greater than 0xffff can cause an unwanted wrap-around. Further, IPv6 jumbograms are not supported (RFC 3838, 3.5), so that values greater than 0xffff are not even useful. Further changes: fixed a typo in the documentation. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- Documentation/networking/udplite.txt | 2 +- net/ipv4/udp.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/networking/udplite.txt b/Documentation/networking/udplite.txt index 3870f280280..855d8da57a2 100644 --- a/Documentation/networking/udplite.txt +++ b/Documentation/networking/udplite.txt @@ -148,7 +148,7 @@ getsockopt(sockfd, SOL_SOCKET, SO_NO_CHECK, &value, ...); is meaningless (as in TCP). Packets with a zero checksum field are - illegal (cf. RFC 3828, sec. 3.1) will be silently discarded. + illegal (cf. RFC 3828, sec. 3.1) and will be silently discarded. 4) Fragmentation diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a751770947a..383d17359d0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1325,6 +1325,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ val = 8; + else if (val > USHORT_MAX) + val = USHORT_MAX; up->pcslen = val; up->pcflag |= UDPLITE_SEND_CC; break; @@ -1337,6 +1339,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; if (val != 0 && val < 8) /* Avoid silly minimal values. */ val = 8; + else if (val > USHORT_MAX) + val = USHORT_MAX; up->pcrlen = val; up->pcflag |= UDPLITE_RECV_CC; break; -- cgit v1.2.3-70-g09d2 From b32d13102d39ed411d152a7ffcc5f66d5b3b1b49 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 21 Jul 2008 18:45:34 -0700 Subject: tcp: Fix bitmask test in tcp_syn_options() As reported by Alexey Dobriyan: CHECK net/ipv4/tcp_output.c net/ipv4/tcp_output.c:475:7: warning: dubious: !x & y And sparse is damn right! if (unlikely(!OPTION_TS & opts->options)) ^^^ size += TCPOLEN_SACKPERM_ALIGNED; OPTION_TS is (1 << 1), so condition will never trigger. Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1fa683c0ba9..a00532de2a8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -472,7 +472,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, } if (likely(sysctl_tcp_sack)) { opts->options |= OPTION_SACK_ADVERTISE; - if (unlikely(!OPTION_TS & opts->options)) + if (unlikely(!(OPTION_TS & opts->options))) size += TCPOLEN_SACKPERM_ALIGNED; } -- cgit v1.2.3-70-g09d2 From 16be63fd1670000b96b76cb55b6f1bead21b4c4b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 27 May 2008 11:50:16 -0700 Subject: bluetooth: remove improper bluetooth class symlinks. Don't create symlinks in a class to a device that is not owned by the class. If the bluetooth subsystem really wants to point to all of the devices it controls, it needs to create real devices, not fake symlinks. Cc: Maxim Krasnyansky Cc: Kay Sievers Acked-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sysfs.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 844ca5f1b2d..c85bf8f678d 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -398,10 +398,6 @@ int hci_register_sysfs(struct hci_dev *hdev) if (device_create_file(dev, bt_attrs[i]) < 0) BT_ERR("Failed to create device attribute"); - if (sysfs_create_link(&bt_class->subsys.kobj, - &dev->kobj, kobject_name(&dev->kobj)) < 0) - BT_ERR("Failed to create class symlink"); - return 0; } @@ -409,9 +405,6 @@ void hci_unregister_sysfs(struct hci_dev *hdev) { BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); - sysfs_remove_link(&bt_class->subsys.kobj, - kobject_name(&hdev->dev.kobj)); - device_del(&hdev->dev); } -- cgit v1.2.3-70-g09d2 From d29f749e252bcdbfe7a75a58f0ee92da16f127c0 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 22 Jul 2008 14:09:06 -0700 Subject: net: Fix build failure with 'make mandocs'. The function header comments have to go with the functions they are documenting, or things go horribly wrong when we try to process them with the docbook tools. Warning(include/linux/netdevice.h:1006): No description found for parameter 'dev_queue' Warning(include/linux/netdevice.h:1033): No description found for parameter 'dev_queue' Warning(include/linux/netdevice.h:1067): No description found for parameter 'dev_queue' Warning(include/linux/netdevice.h:1093): No description found for parameter 'dev_queue' Warning(include/linux/netdevice.h:1474): No description found for parameter 'txq' Error(net/core/dev.c:1674): cannot understand prototype: 'u32 simple_tx_hashrnd; ' Signed-off-by: Dave Jones Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/netdevice.h | 58 +++++++++++++++++++++++------------------------ net/core/dev.c | 51 ++++++++++++++++++++--------------------- 2 files changed, 54 insertions(+), 55 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f5ea445f89f..b4d056ceab9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -996,17 +996,17 @@ static inline void netif_tx_schedule_all(struct net_device *dev) netif_schedule_queue(netdev_get_tx_queue(dev, i)); } +static inline void netif_tx_start_queue(struct netdev_queue *dev_queue) +{ + clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state); +} + /** * netif_start_queue - allow transmit * @dev: network device * * Allow upper layers to call the device hard_start_xmit routine. */ -static inline void netif_tx_start_queue(struct netdev_queue *dev_queue) -{ - clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state); -} - static inline void netif_start_queue(struct net_device *dev) { netif_tx_start_queue(netdev_get_tx_queue(dev, 0)); @@ -1022,13 +1022,6 @@ static inline void netif_tx_start_all_queues(struct net_device *dev) } } -/** - * netif_wake_queue - restart transmit - * @dev: network device - * - * Allow upper layers to call the device hard_start_xmit routine. - * Used for flow control when transmit resources are available. - */ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) { #ifdef CONFIG_NETPOLL_TRAP @@ -1041,6 +1034,13 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) __netif_schedule(dev_queue->qdisc); } +/** + * netif_wake_queue - restart transmit + * @dev: network device + * + * Allow upper layers to call the device hard_start_xmit routine. + * Used for flow control when transmit resources are available. + */ static inline void netif_wake_queue(struct net_device *dev) { netif_tx_wake_queue(netdev_get_tx_queue(dev, 0)); @@ -1056,6 +1056,11 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) } } +static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) +{ + set_bit(__QUEUE_STATE_XOFF, &dev_queue->state); +} + /** * netif_stop_queue - stop transmitted packets * @dev: network device @@ -1063,11 +1068,6 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) * Stop upper layers calling the device hard_start_xmit routine. * Used for flow control when transmit resources are unavailable. */ -static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) -{ - set_bit(__QUEUE_STATE_XOFF, &dev_queue->state); -} - static inline void netif_stop_queue(struct net_device *dev) { netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); @@ -1083,17 +1083,17 @@ static inline void netif_tx_stop_all_queues(struct net_device *dev) } } +static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue) +{ + return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state); +} + /** * netif_queue_stopped - test if transmit queue is flowblocked * @dev: network device * * Test if transmit queue on device is currently unable to send. */ -static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue) -{ - return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state); -} - static inline int netif_queue_stopped(const struct net_device *dev) { return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); @@ -1463,13 +1463,6 @@ static inline void netif_rx_complete(struct net_device *dev, local_irq_restore(flags); } -/** - * netif_tx_lock - grab network device transmit lock - * @dev: network device - * @cpu: cpu number of lock owner - * - * Get network device transmit lock - */ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); @@ -1482,6 +1475,13 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq) txq->xmit_lock_owner = smp_processor_id(); } +/** + * netif_tx_lock - grab network device transmit lock + * @dev: network device + * @cpu: cpu number of lock owner + * + * Get network device transmit lock + */ static inline void netif_tx_lock(struct net_device *dev) { int cpu = smp_processor_id(); diff --git a/net/core/dev.c b/net/core/dev.c index ad5598d2bb3..65eea83613e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1645,32 +1645,6 @@ out_kfree_skb: return 0; } -/** - * dev_queue_xmit - transmit a buffer - * @skb: buffer to transmit - * - * Queue a buffer for transmission to a network device. The caller must - * have set the device and priority and built the buffer before calling - * this function. The function can be called from an interrupt. - * - * A negative errno code is returned on a failure. A success does not - * guarantee the frame will be transmitted as it may be dropped due - * to congestion or traffic shaping. - * - * ----------------------------------------------------------------------------------- - * I notice this method can also return errors from the queue disciplines, - * including NET_XMIT_DROP, which is a positive value. So, errors can also - * be positive. - * - * Regardless of the return value, the skb is consumed, so it is currently - * difficult to retry a send to this method. (You can bump the ref count - * before sending to hold a reference for retry if you are careful.) - * - * When calling this method, interrupts MUST be enabled. This is because - * the BH enable code must have IRQs enabled so that it will not deadlock. - * --BLG - */ - static u32 simple_tx_hashrnd; static int simple_tx_hashrnd_initialized = 0; @@ -1738,6 +1712,31 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, return netdev_get_tx_queue(dev, queue_index); } +/** + * dev_queue_xmit - transmit a buffer + * @skb: buffer to transmit + * + * Queue a buffer for transmission to a network device. The caller must + * have set the device and priority and built the buffer before calling + * this function. The function can be called from an interrupt. + * + * A negative errno code is returned on a failure. A success does not + * guarantee the frame will be transmitted as it may be dropped due + * to congestion or traffic shaping. + * + * ----------------------------------------------------------------------------------- + * I notice this method can also return errors from the queue disciplines, + * including NET_XMIT_DROP, which is a positive value. So, errors can also + * be positive. + * + * Regardless of the return value, the skb is consumed, so it is currently + * difficult to retry a send to this method. (You can bump the ref count + * before sending to hold a reference for retry if you are careful.) + * + * When calling this method, interrupts MUST be enabled. This is because + * the BH enable code must have IRQs enabled so that it will not deadlock. + * --BLG + */ int dev_queue_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; -- cgit v1.2.3-70-g09d2 From cf508b1211dbe576778ff445ea1b4b0bcfa5c4ea Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Jul 2008 14:16:42 -0700 Subject: netdev: Handle ->addr_list_lock just like ->_xmit_lock for lockdep. The new address list lock needs to handle the same device layering issues that the _xmit_lock one does. This integrates work done by Patrick McHardy. Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 3 +++ drivers/net/hamradio/bpqether.c | 2 ++ drivers/net/macvlan.c | 3 +++ drivers/net/wireless/hostap/hostap_hw.c | 3 +++ net/8021q/vlan_dev.c | 4 ++++ net/core/dev.c | 27 +++++++++++++++++++++------ net/netrom/af_netrom.c | 2 ++ net/rose/af_rose.c | 2 ++ 8 files changed, 40 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9737c06045d..a641eeaa2a2 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5041,6 +5041,7 @@ static int bond_check_params(struct bond_params *params) } static struct lock_class_key bonding_netdev_xmit_lock_key; +static struct lock_class_key bonding_netdev_addr_lock_key; static void bond_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, @@ -5052,6 +5053,8 @@ static void bond_set_lockdep_class_one(struct net_device *dev, static void bond_set_lockdep_class(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, + &bonding_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL); } diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index b6500b2aacf..58f4b1d7bf1 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -123,6 +123,7 @@ static LIST_HEAD(bpq_devices); * off into a separate class since they always nest. */ static struct lock_class_key bpq_netdev_xmit_lock_key; +static struct lock_class_key bpq_netdev_addr_lock_key; static void bpq_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, @@ -133,6 +134,7 @@ static void bpq_set_lockdep_class_one(struct net_device *dev, static void bpq_set_lockdep_class(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, &bpq_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, bpq_set_lockdep_class_one, NULL); } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index efbc15567dd..42394505bb5 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -276,6 +276,7 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu) * separate class since they always nest. */ static struct lock_class_key macvlan_netdev_xmit_lock_key; +static struct lock_class_key macvlan_netdev_addr_lock_key; #define MACVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ @@ -295,6 +296,8 @@ static void macvlan_set_lockdep_class_one(struct net_device *dev, static void macvlan_set_lockdep_class(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, + &macvlan_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, macvlan_set_lockdep_class_one, NULL); } diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c index 13d5882f1f2..3153fe9d7ce 100644 --- a/drivers/net/wireless/hostap/hostap_hw.c +++ b/drivers/net/wireless/hostap/hostap_hw.c @@ -3101,6 +3101,7 @@ static void prism2_clear_set_tim_queue(local_info_t *local) * This is a natural nesting, which needs a split lock type. */ static struct lock_class_key hostap_netdev_xmit_lock_key; +static struct lock_class_key hostap_netdev_addr_lock_key; static void prism2_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, @@ -3112,6 +3113,8 @@ static void prism2_set_lockdep_class_one(struct net_device *dev, static void prism2_set_lockdep_class(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, + &hostap_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, prism2_set_lockdep_class_one, NULL); } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index f42bc2b26b8..4bf014e51f8 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -569,6 +569,7 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) * separate class since they always nest. */ static struct lock_class_key vlan_netdev_xmit_lock_key; +static struct lock_class_key vlan_netdev_addr_lock_key; static void vlan_dev_set_lockdep_one(struct net_device *dev, struct netdev_queue *txq, @@ -581,6 +582,9 @@ static void vlan_dev_set_lockdep_one(struct net_device *dev, static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass) { + lockdep_set_class_and_subclass(&dev->addr_list_lock, + &vlan_netdev_addr_lock_key, + subclass); netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass); } diff --git a/net/core/dev.c b/net/core/dev.c index 65eea83613e..6bf217da9d8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -261,7 +261,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain); DEFINE_PER_CPU(struct softnet_data, softnet_data); -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#ifdef CONFIG_LOCKDEP /* * register_netdevice() inits txq->_xmit_lock and sets lockdep class * according to dev->type @@ -301,6 +301,7 @@ static const char *netdev_lock_name[] = "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; +static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; static inline unsigned short netdev_lock_pos(unsigned short dev_type) { @@ -313,8 +314,8 @@ static inline unsigned short netdev_lock_pos(unsigned short dev_type) return ARRAY_SIZE(netdev_lock_type) - 1; } -static inline void netdev_set_lockdep_class(spinlock_t *lock, - unsigned short dev_type) +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) { int i; @@ -322,9 +323,22 @@ static inline void netdev_set_lockdep_class(spinlock_t *lock, lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], netdev_lock_name[i]); } + +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) +{ + int i; + + i = netdev_lock_pos(dev->type); + lockdep_set_class_and_name(&dev->addr_list_lock, + &netdev_addr_lock_key[i], + netdev_lock_name[i]); +} #else -static inline void netdev_set_lockdep_class(spinlock_t *lock, - unsigned short dev_type) +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ +} +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) { } #endif @@ -3851,7 +3865,7 @@ static void __netdev_init_queue_locks_one(struct net_device *dev, void *_unused) { spin_lock_init(&dev_queue->_xmit_lock); - netdev_set_lockdep_class(&dev_queue->_xmit_lock, dev->type); + netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); dev_queue->xmit_lock_owner = -1; } @@ -3896,6 +3910,7 @@ int register_netdevice(struct net_device *dev) net = dev_net(dev); spin_lock_init(&dev->addr_list_lock); + netdev_set_addr_lockdep_class(dev); netdev_init_queue_locks(dev); dev->iflink = -1; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index fccc250f95f..532e4faa29f 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -73,6 +73,7 @@ static const struct proto_ops nr_proto_ops; * separate class since they always nest. */ static struct lock_class_key nr_netdev_xmit_lock_key; +static struct lock_class_key nr_netdev_addr_lock_key; static void nr_set_lockdep_one(struct net_device *dev, struct netdev_queue *txq, @@ -83,6 +84,7 @@ static void nr_set_lockdep_one(struct net_device *dev, static void nr_set_lockdep_key(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL); } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index dbc963b4f5f..a7f1ce11bc2 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -74,6 +74,7 @@ ax25_address rose_callsign; * separate class since they always nest. */ static struct lock_class_key rose_netdev_xmit_lock_key; +static struct lock_class_key rose_netdev_addr_lock_key; static void rose_set_lockdep_one(struct net_device *dev, struct netdev_queue *txq, @@ -84,6 +85,7 @@ static void rose_set_lockdep_one(struct net_device *dev, static void rose_set_lockdep_key(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL); } -- cgit v1.2.3-70-g09d2 From a94f779f9d82eb2d758a8715eaae5df98e8dcb21 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 22 Jul 2008 14:20:11 -0700 Subject: pkt_sched: make qdisc_class_hash_alloc() static This patch makes the needlessly global qdisc_class_hash_alloc() static. Signed-off-by: Adrian Bunk Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 5219d5f9d75..b0601642e22 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -447,7 +447,7 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) } EXPORT_SYMBOL(qdisc_watchdog_cancel); -struct hlist_head *qdisc_class_hash_alloc(unsigned int n) +static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) { unsigned int size = n * sizeof(struct hlist_head), i; struct hlist_head *h; -- cgit v1.2.3-70-g09d2 From abd0b198ea699578c3c3476d646c91842e19dbd2 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 22 Jul 2008 14:20:45 -0700 Subject: sctp: make sctp_outq_flush() static sctp_outq_flush() can now become static. Signed-off-by: Adrian Bunk Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 - net/sctp/outqueue.c | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 70eb64a7e1a..535a18f57a1 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1161,7 +1161,6 @@ void sctp_outq_init(struct sctp_association *, struct sctp_outq *); void sctp_outq_teardown(struct sctp_outq *); void sctp_outq_free(struct sctp_outq*); int sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk); -int sctp_outq_flush(struct sctp_outq *, int); int sctp_outq_sack(struct sctp_outq *, struct sctp_sackhdr *); int sctp_outq_is_empty(const struct sctp_outq *); void sctp_outq_restart(struct sctp_outq *); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 70ead8dc348..4328ad5439c 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -71,6 +71,8 @@ static void sctp_mark_missing(struct sctp_outq *q, static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); +static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout); + /* Add data to the front of the queue. */ static inline void sctp_outq_head_data(struct sctp_outq *q, struct sctp_chunk *ch) @@ -712,7 +714,7 @@ int sctp_outq_uncork(struct sctp_outq *q) * locking concerns must be made. Today we use the sock lock to protect * this function. */ -int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) +static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) { struct sctp_packet *packet; struct sctp_packet singleton; -- cgit v1.2.3-70-g09d2 From 4d6971e909e904be60218739fc961188471fc4f4 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 22 Jul 2008 14:21:30 -0700 Subject: sctp: remove sctp_assoc_proc_exit() Commit 20c2c1fd6c842caf70dcb1d94b9d58861949fd3d (sctp: add sctp/remaddr table to complete RFC remote address table OID) added an unused sctp_assoc_proc_exit() function that seems to have been unintentionally created when copying the assocs code. Signed-off-by: Adrian Bunk Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/proc.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 5dd89831ece..f268910620b 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -519,8 +519,3 @@ int __init sctp_remaddr_proc_init(void) return 0; } - -void sctp_assoc_proc_exit(void) -{ - remove_proc_entry("remaddr", proc_net_sctp); -} -- cgit v1.2.3-70-g09d2 From 888c848ed34bd5f8cb56567624c0d951ab35174e Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 22 Jul 2008 14:21:58 -0700 Subject: ipv6: make struct ipv6_devconf static struct ipv6_devconf can now become static. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/net/if_inet6.h | 2 -- net/ipv6/addrconf.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index db66c792774..c8effa4b1fe 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -193,8 +193,6 @@ struct inet6_dev struct rcu_head rcu; }; -extern struct ipv6_devconf ipv6_devconf; - static inline void ipv6_eth_mc_map(struct in6_addr *addr, char *buf) { /* diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9f4fcce6379..74d543d504a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -153,7 +153,7 @@ static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); -struct ipv6_devconf ipv6_devconf __read_mostly = { +static struct ipv6_devconf ipv6_devconf __read_mostly = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, .mtu6 = IPV6_MIN_MTU, -- cgit v1.2.3-70-g09d2 From 417f28bb340725544c36b35465444d2fd57232b8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 22 Jul 2008 14:33:45 -0700 Subject: netns: dont alloc ipv6 fib timer list FIB timer list is a trivial size structure, avoid indirection and just put it in existing ns. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 2 +- net/ipv6/ip6_fib.c | 40 +++++++++++++--------------------------- 2 files changed, 14 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 5bacd838e88..2932721180c 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -39,7 +39,7 @@ struct netns_ipv6 { #endif struct rt6_info *ip6_null_entry; struct rt6_statistics *rt6_stats; - struct timer_list *ip6_fib_timer; + struct timer_list ip6_fib_timer; struct hlist_head *fib_table_hash; struct fib6_table *fib6_main_tbl; struct dst_ops *ip6_dst_ops; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 944095cf5e3..e0922975c41 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -661,16 +661,16 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) { - if (!timer_pending(net->ipv6.ip6_fib_timer) && + if (!timer_pending(&net->ipv6.ip6_fib_timer) && (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) - mod_timer(net->ipv6.ip6_fib_timer, + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } void fib6_force_start_gc(struct net *net) { - if (!timer_pending(net->ipv6.ip6_fib_timer)) - mod_timer(net->ipv6.ip6_fib_timer, + if (!timer_pending(&net->ipv6.ip6_fib_timer)) + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } @@ -1449,7 +1449,7 @@ void fib6_run_gc(unsigned long expires, struct net *net) } else { local_bh_disable(); if (!spin_trylock(&fib6_gc_lock)) { - mod_timer(net->ipv6.ip6_fib_timer, jiffies + HZ); + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); local_bh_enable(); return; } @@ -1462,12 +1462,10 @@ void fib6_run_gc(unsigned long expires, struct net *net) fib6_clean_all(net, fib6_age, 0, NULL); if (gc_args.more) - mod_timer(net->ipv6.ip6_fib_timer, jiffies + + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); - else { - del_timer(net->ipv6.ip6_fib_timer); - net->ipv6.ip6_fib_timer->expires = 0; - } + else + del_timer(&net->ipv6.ip6_fib_timer); spin_unlock_bh(&fib6_gc_lock); } @@ -1478,16 +1476,7 @@ static void fib6_gc_timer_cb(unsigned long arg) static int fib6_net_init(struct net *net) { - int ret; - struct timer_list *timer; - - ret = -ENOMEM; - timer = kzalloc(sizeof(*timer), GFP_KERNEL); - if (!timer) - goto out; - - setup_timer(timer, fib6_gc_timer_cb, (unsigned long)net); - net->ipv6.ip6_fib_timer = timer; + setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); if (!net->ipv6.rt6_stats) @@ -1521,9 +1510,7 @@ static int fib6_net_init(struct net *net) #endif fib6_tables_init(net); - ret = 0; -out: - return ret; + return 0; #ifdef CONFIG_IPV6_MULTIPLE_TABLES out_fib6_main_tbl: @@ -1534,15 +1521,14 @@ out_fib_table_hash: out_rt6_stats: kfree(net->ipv6.rt6_stats); out_timer: - kfree(timer); - goto out; + return -ENOMEM; } static void fib6_net_exit(struct net *net) { rt6_ifdown(net, NULL); - del_timer_sync(net->ipv6.ip6_fib_timer); - kfree(net->ipv6.ip6_fib_timer); + del_timer_sync(&net->ipv6.ip6_fib_timer); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES kfree(net->ipv6.fib6_local_tbl); #endif -- cgit v1.2.3-70-g09d2 From c8a4522245e9931a53a98d5160bb4c00d3f73921 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 22 Jul 2008 14:34:09 -0700 Subject: ipv6: use round_jiffies This timer normally happens once a minute, there is no need to cause an early wakeup for it, so align it to next second boundary to safe power. It can't be deferred because then it could take too long on cleanup or DoS. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e0922975c41..03e23d058ec 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1462,8 +1462,9 @@ void fib6_run_gc(unsigned long expires, struct net *net) fib6_clean_all(net, fib6_age, 0, NULL); if (gc_args.more) - mod_timer(&net->ipv6.ip6_fib_timer, jiffies + - net->ipv6.sysctl.ip6_rt_gc_interval); + mod_timer(&net->ipv6.ip6_fib_timer, + round_jiffies(jiffies + + net->ipv6.sysctl.ip6_rt_gc_interval)); else del_timer(&net->ipv6.ip6_fib_timer); spin_unlock_bh(&fib6_gc_lock); -- cgit v1.2.3-70-g09d2 From a76d7345a3f92bb8352f200e7b2e380dddcd7e36 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 22 Jul 2008 14:34:35 -0700 Subject: ipv6: use spin_trylock_bh Now there is spin_trylock_bh, use it rather than open coding. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 03e23d058ec..0ec7f2b636f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1447,10 +1447,8 @@ void fib6_run_gc(unsigned long expires, struct net *net) gc_args.timeout = expires ? (int)expires : net->ipv6.sysctl.ip6_rt_gc_interval; } else { - local_bh_disable(); - if (!spin_trylock(&fib6_gc_lock)) { + if (!spin_trylock_bh(&fib6_gc_lock)) { mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); - local_bh_enable(); return; } gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; -- cgit v1.2.3-70-g09d2 From 75307c0fe7fcb3b52a92fe32384fc33f50622654 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 22 Jul 2008 14:35:07 -0700 Subject: ipv6: use kcalloc Th fib_table_hash is an array, so use kcalloc. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 0ec7f2b636f..c72fd2461ca 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1481,9 +1481,9 @@ static int fib6_net_init(struct net *net) if (!net->ipv6.rt6_stats) goto out_timer; - net->ipv6.fib_table_hash = - kzalloc(sizeof(*net->ipv6.fib_table_hash)*FIB_TABLE_HASHSZ, - GFP_KERNEL); + net->ipv6.fib_table_hash = kcalloc(FIB_TABLE_HASHSZ, + sizeof(*net->ipv6.fib_table_hash), + GFP_KERNEL); if (!net->ipv6.fib_table_hash) goto out_rt6_stats; -- cgit v1.2.3-70-g09d2 From 3d0f24a74e7957593a5622eb5c04ed6860dd8391 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 22 Jul 2008 14:35:50 -0700 Subject: ipv6: icmp6_dst_gc return change Change icmp6_dst_gc to return the one value the caller cares about rather than using call by reference. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- net/ipv6/ip6_fib.c | 3 +-- net/ipv6/route.c | 10 ++++------ 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9313491e3da..2f8b3c06a10 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -68,7 +68,7 @@ extern struct rt6_info *rt6_lookup(struct net *net, extern struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *addr); -extern int icmp6_dst_gc(int *more); +extern int icmp6_dst_gc(void); extern void fib6_force_start_gc(struct net *net); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index c72fd2461ca..08ea2de28d6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1453,9 +1453,8 @@ void fib6_run_gc(unsigned long expires, struct net *net) } gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; } - gc_args.more = 0; - icmp6_dst_gc(&gc_args.more); + gc_args.more = icmp6_dst_gc(); fib6_clean_all(net, fib6_age, 0, NULL); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 615b328de25..86540b24b27 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -978,13 +978,12 @@ out: return &rt->u.dst; } -int icmp6_dst_gc(int *more) +int icmp6_dst_gc(void) { struct dst_entry *dst, *next, **pprev; - int freed; + int more = 0; next = NULL; - freed = 0; spin_lock_bh(&icmp6_dst_lock); pprev = &icmp6_dst_gc_list; @@ -993,16 +992,15 @@ int icmp6_dst_gc(int *more) if (!atomic_read(&dst->__refcnt)) { *pprev = dst->next; dst_free(dst); - freed++; } else { pprev = &dst->next; - (*more)++; + ++more; } } spin_unlock_bh(&icmp6_dst_lock); - return freed; + return more; } static int ip6_dst_gc(struct dst_ops *ops) -- cgit v1.2.3-70-g09d2 From 16a37acaaf4aaa631ba3f83710ed6cdb1a597520 Mon Sep 17 00:00:00 2001 From: Maciej Sosnowski Date: Tue, 22 Jul 2008 17:30:57 -0700 Subject: I/OAT: tcp_dma_copybreak default value dependent on I/OAT version I/OAT DMA performance tuning showed different optimal values of tcp_dma_copybreak for different I/OAT versions (4096 for 1.2 and 2048 for 2.0). This patch lets ioatdma driver set tcp_dma_copybreak value according to these results. [dan.j.williams@intel.com: remove some ifdefs] Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat_dma.c | 2 ++ drivers/dma/ioatdma.h | 15 +++++++++++++++ net/core/user_dma.c | 1 + 3 files changed, 18 insertions(+) (limited to 'net') diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index da572968a7d..ece5a0e3a33 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -1581,6 +1581,8 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, if (err) goto err_self_test; + ioat_set_tcp_copy_break(device); + dma_async_device_register(&device->common); INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h index c6ec933f989..685adb62aa5 100644 --- a/drivers/dma/ioatdma.h +++ b/drivers/dma/ioatdma.h @@ -27,6 +27,7 @@ #include #include #include +#include #define IOAT_DMA_VERSION "2.18" @@ -129,6 +130,20 @@ struct ioat_desc_sw { struct dma_async_tx_descriptor async_tx; }; +static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) +{ + #ifdef CONFIG_NET_DMA + switch (dev->version) { + case IOAT_VER_1_2: + sysctl_tcp_dma_copybreak = 4096; + break; + case IOAT_VER_2_0: + sysctl_tcp_dma_copybreak = 2048; + break; + } + #endif +} + #if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE) struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase); diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 0ad1cd57bc3..de760504f6f 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -34,6 +34,7 @@ #define NET_DMA_DEFAULT_COPYBREAK 4096 int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK; +EXPORT_SYMBOL(sysctl_tcp_dma_copybreak); /** * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec. -- cgit v1.2.3-70-g09d2 From 5b3ab1dbd401b36ba2f9bfee2d2dae252fd62cd8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 Jul 2008 14:01:29 -0700 Subject: netdev: Remove warning from __netif_schedule(). It isn't helping anything and we aren't going to be able to change all the drivers that do queue wakeups in strange situations. Just letting a noop_qdisc get scheduled will work because when qdisc_run() executes via net_tx_work() it will simply find no packets pending when it makes the ->dequeue() call in qdisc_restart. Signed-off-by: David S. Miller --- net/core/dev.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6bf217da9d8..ccf97f9f37e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1341,9 +1341,6 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) void __netif_schedule(struct Qdisc *q) { - if (WARN_ON_ONCE(q == &noop_qdisc)) - return; - if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) { struct softnet_data *sd; unsigned long flags; -- cgit v1.2.3-70-g09d2 From b4942af65028c5eb516fdd9053020ccb2ee186ce Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 23 Jul 2008 14:06:04 -0700 Subject: net: Update entry in af_family_clock_key_strings In the merge phase of the CAN subsystem the af_family_clock_key_strings[] have been added to sock.c in commit 443aef0eddfa44c158d1b94ebb431a70638fcab4 (lockdep: fixup sk_callback_lock annotation). This trivial patch adds the missing name for address family 29 (AF_CAN). Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 10a64d57078..91f8bbc9352 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -180,7 +180,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , - "clock-27" , "clock-28" , "clock-29" , + "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_MAX" }; -- cgit v1.2.3-70-g09d2 From 4b53fb67e385b856a991d402096379dab462170a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 Jul 2008 16:38:45 -0700 Subject: tcp: Clear probes_out more aggressively in tcp_ack(). This is based upon an excellent bug report from Eric Dumazet. tcp_ack() should clear ->icsk_probes_out even if there are packets outstanding. Otherwise if we get a sequence of ACKs while we do have packets outstanding over and over again, we'll never clear the probes_out value and eventually think the connection is too sick and we'll reset it. This appears to be some "optimization" added to tcp_ack() in the 2.4.x timeframe. In 2.2.x, probes_out is pretty much always cleared by tcp_ack(). Here is Eric's original report: ---------------------------------------- Apparently, we can in some situations reset TCP connections in a couple of seconds when some frames are lost. In order to reproduce the problem, please try the following program on linux-2.6.25.* Setup some iptables rules to allow two frames per second sent on loopback interface to tcp destination port 12000 iptables -N SLOWLO iptables -A SLOWLO -m hashlimit --hashlimit 2 --hashlimit-burst 1 --hashlimit-mode dstip --hashlimit-name slow2 -j ACCEPT iptables -A SLOWLO -j DROP iptables -A OUTPUT -o lo -p tcp --dport 12000 -j SLOWLO Then run the attached program and see the output : # ./loop State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,200ms,1) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,200ms,3) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,200ms,5) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,200ms,7) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,200ms,9) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,200ms,11) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,201ms,13) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,188ms,15) write(): Connection timed out wrote 890 bytes but was interrupted after 9 seconds ESTAB 0 0 127.0.0.1:12000 127.0.0.1:54455 Exiting read() because no data available (4000 ms timeout). read 860 bytes While this tcp session makes progress (sending frames with 50 bytes of payload, every 500ms), linux tcp stack decides to reset it, when tcp_retries 2 is reached (default value : 15) tcpdump : 15:30:28.856695 IP 127.0.0.1.56554 > 127.0.0.1.12000: S 33788768:33788768(0) win 32792 15:30:28.856711 IP 127.0.0.1.12000 > 127.0.0.1.56554: S 33899253:33899253(0) ack 33788769 win 32792 15:30:29.356947 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 1:61(60) ack 1 win 257 15:30:29.356966 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 61 win 257 15:30:29.866415 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 61:111(50) ack 1 win 257 15:30:29.866427 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 111 win 257 15:30:30.366516 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 111:161(50) ack 1 win 257 15:30:30.366527 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 161 win 257 15:30:30.876196 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 161:211(50) ack 1 win 257 15:30:30.876207 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 211 win 257 15:30:31.376282 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 211:261(50) ack 1 win 257 15:30:31.376290 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 261 win 257 15:30:31.885619 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 261:311(50) ack 1 win 257 15:30:31.885631 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 311 win 257 15:30:32.385705 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 311:361(50) ack 1 win 257 15:30:32.385715 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 361 win 257 15:30:32.895249 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 361:411(50) ack 1 win 257 15:30:32.895266 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 411 win 257 15:30:33.395341 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 411:461(50) ack 1 win 257 15:30:33.395351 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 461 win 257 15:30:33.918085 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 461:511(50) ack 1 win 257 15:30:33.918096 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 511 win 257 15:30:34.418163 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 511:561(50) ack 1 win 257 15:30:34.418172 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 561 win 257 15:30:34.927685 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 561:611(50) ack 1 win 257 15:30:34.927698 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 611 win 257 15:30:35.427757 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 611:661(50) ack 1 win 257 15:30:35.427766 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 661 win 257 15:30:35.937359 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 661:711(50) ack 1 win 257 15:30:35.937376 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 711 win 257 15:30:36.437451 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 711:761(50) ack 1 win 257 15:30:36.437464 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 761 win 257 15:30:36.947022 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 761:811(50) ack 1 win 257 15:30:36.947039 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 811 win 257 15:30:37.447135 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 811:861(50) ack 1 win 257 15:30:37.447203 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 861 win 257 15:30:41.448171 IP 127.0.0.1.12000 > 127.0.0.1.56554: F 1:1(0) ack 861 win 257 15:30:41.448189 IP 127.0.0.1.56554 > 127.0.0.1.12000: R 33789629:33789629(0) win 0 Source of program : /* * small producer/consumer program. * setup a listener on 127.0.0.1:12000 * Forks a child * child connect to 127.0.0.1, and sends 10 bytes on this tcp socket every 100 ms * Father accepts connection, and read all data */ #include #include #include #include #include #include #include int port = 12000; char buffer[4096]; int main(int argc, char *argv[]) { int lfd = socket(AF_INET, SOCK_STREAM, 0); struct sockaddr_in socket_address; time_t t0, t1; int on = 1, sfd, res; unsigned long total = 0; socklen_t alen = sizeof(socket_address); pid_t pid; time(&t0); socket_address.sin_family = AF_INET; socket_address.sin_port = htons(port); socket_address.sin_addr.s_addr = htonl(INADDR_LOOPBACK); if (lfd == -1) { perror("socket()"); return 1; } setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)); if (bind(lfd, (struct sockaddr *)&socket_address, sizeof(socket_address)) == -1) { perror("bind"); close(lfd); return 1; } if (listen(lfd, 1) == -1) { perror("listen()"); close(lfd); return 1; } pid = fork(); if (pid == 0) { int i, cfd = socket(AF_INET, SOCK_STREAM, 0); close(lfd); if (connect(cfd, (struct sockaddr *)&socket_address, sizeof(socket_address)) == -1) { perror("connect()"); return 1; } for (i = 0 ; ;) { res = write(cfd, "blablabla\n", 10); if (res > 0) total += res; else if (res == -1) { perror("write()"); break; } else break; usleep(100000); if (++i == 10) { system("ss -on dst 127.0.0.1:12000"); i = 0; } } time(&t1); fprintf(stderr, "wrote %lu bytes but was interrupted after %g seconds\n", total, difftime(t1, t0)); system("ss -on | grep 127.0.0.1:12000"); close(cfd); return 0; } sfd = accept(lfd, (struct sockaddr *)&socket_address, &alen); if (sfd == -1) { perror("accept"); return 1; } close(lfd); while (1) { struct pollfd pfd[1]; pfd[0].fd = sfd; pfd[0].events = POLLIN; if (poll(pfd, 1, 4000) == 0) { fprintf(stderr, "Exiting read() because no data available (4000 ms timeout).\n"); break; } res = read(sfd, buffer, sizeof(buffer)); if (res > 0) total += res; else if (res == 0) break; else perror("read()"); } fprintf(stderr, "read %lu bytes\n", total); close(sfd); return 0; } ---------------------------------------- Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1f5e6049883..75efd244f2a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3292,6 +3292,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) * log. Something worked... */ sk->sk_err_soft = 0; + icsk->icsk_probes_out = 0; tp->rcv_tstamp = tcp_time_stamp; prior_packets = tp->packets_out; if (!prior_packets) @@ -3324,8 +3325,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) return 1; no_queue: - icsk->icsk_probes_out = 0; - /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. -- cgit v1.2.3-70-g09d2 From 70eed75d76635ba7350651b9bd96529a306ec67a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 23 Jul 2008 16:42:42 -0700 Subject: netfilter: make security table depend on NETFILTER_ADVANCED Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/Kconfig | 2 +- net/ipv6/netfilter/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f23e60c93ef..90eb7cb47e7 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -369,7 +369,7 @@ config IP_NF_SECURITY tristate "Security table" depends on IP_NF_IPTABLES depends on SECURITY - default m if NETFILTER_ADVANCED=n + depends on NETFILTER_ADVANCED help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 689dec899c5..0cfcce7b18d 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -213,7 +213,7 @@ config IP6_NF_SECURITY tristate "Security table" depends on IP6_NF_IPTABLES depends on SECURITY - default m if NETFILTER_ADVANCED=n + depends on NETFILTER_ADVANCED help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. -- cgit v1.2.3-70-g09d2 From f867e6af94239a04ec23aeec2fcda5aa58e41db7 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 23 Jul 2008 21:34:27 -0700 Subject: pkt_sched: sch_sfq: dump a real number of flows Dump the "flows" number according to the number of active flows instead of repeating the "limit". Reported-by: Denys Fedoryshchenko Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 8589da66656..73f53844ce9 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -536,7 +536,14 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) opt.limit = q->limit; opt.divisor = SFQ_HASH_DIVISOR; - opt.flows = q->limit; + opt.flows = 0; + if (q->tail != SFQ_DEPTH) { + unsigned int i; + + for (i = 0; i < SFQ_HASH_DIVISOR; i++) + if (q->ht[i] != SFQ_DEPTH) + opt.flows++; + } NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); -- cgit v1.2.3-70-g09d2 From a677a039be7243357d93502bff2b40850c942e2d Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 23 Jul 2008 21:29:17 -0700 Subject: flag parameters: socket and socketpair This patch adds support for flag values which are ORed to the type passwd to socket and socketpair. The additional code is minimal. The flag values in this implementation can and must match the O_* flags. This avoids overhead in the conversion. The internal functions sock_alloc_fd and sock_map_fd get a new parameters and all callers are changed. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #include #include #include #include #include #define PORT 57392 /* For Linux these must be the same. */ #define SOCK_CLOEXEC O_CLOEXEC int main (void) { int fd; fd = socket (PF_INET, SOCK_STREAM, 0); if (fd == -1) { puts ("socket(0) failed"); return 1; } int coe = fcntl (fd, F_GETFD); if (coe == -1) { puts ("fcntl failed"); return 1; } if (coe & FD_CLOEXEC) { puts ("socket(0) set close-on-exec flag"); return 1; } close (fd); fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0); if (fd == -1) { puts ("socket(SOCK_CLOEXEC) failed"); return 1; } coe = fcntl (fd, F_GETFD); if (coe == -1) { puts ("fcntl failed"); return 1; } if ((coe & FD_CLOEXEC) == 0) { puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag"); return 1; } close (fd); int fds[2]; if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1) { puts ("socketpair(0) failed"); return 1; } for (int i = 0; i < 2; ++i) { coe = fcntl (fds[i], F_GETFD); if (coe == -1) { puts ("fcntl failed"); return 1; } if (coe & FD_CLOEXEC) { printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i); return 1; } close (fds[i]); } if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1) { puts ("socketpair(SOCK_CLOEXEC) failed"); return 1; } for (int i = 0; i < 2; ++i) { coe = fcntl (fds[i], F_GETFD); if (coe == -1) { puts ("fcntl failed"); return 1; } if ((coe & FD_CLOEXEC) == 0) { printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i); return 1; } close (fds[i]); } puts ("OK"); return 0; } ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Ulrich Drepper Acked-by: Davide Libenzi Cc: Michael Kerrisk Cc: "David S. Miller" Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-mips/socket.h | 7 +++++++ include/linux/net.h | 9 ++++++++- net/9p/trans_fd.c | 2 +- net/sctp/socket.c | 2 +- net/socket.c | 28 ++++++++++++++++++++-------- 5 files changed, 37 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h index 63f60254d30..facc2d7a87c 100644 --- a/include/asm-mips/socket.h +++ b/include/asm-mips/socket.h @@ -102,6 +102,13 @@ enum sock_type { }; #define SOCK_MAX (SOCK_PACKET + 1) +/* Mask which covers at least up to SOCK_MASK-1. The + * * remaining bits are used as flags. */ +#define SOCK_TYPE_MASK 0xf + +/* Flags for socket, socketpair, paccept */ +#define SOCK_CLOEXEC O_CLOEXEC +#define SOCK_NONBLOCK O_NONBLOCK #define ARCH_HAS_SOCKET_TYPES 1 diff --git a/include/linux/net.h b/include/linux/net.h index 150a48c68d5..8b5383c45b4 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -20,6 +20,7 @@ #include #include +#include /* For O_CLOEXEC */ #include struct poll_table_struct; @@ -94,6 +95,12 @@ enum sock_type { }; #define SOCK_MAX (SOCK_PACKET + 1) +/* Mask which covers at least up to SOCK_MASK-1. The + * remaining bits are used as flags. */ +#define SOCK_TYPE_MASK 0xf + +/* Flags for socket, socketpair, paccept */ +#define SOCK_CLOEXEC O_CLOEXEC #endif /* ARCH_HAS_SOCKET_TYPES */ @@ -208,7 +215,7 @@ extern int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len); extern int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); -extern int sock_map_fd(struct socket *sock); +extern int sock_map_fd(struct socket *sock, int flags); extern struct socket *sockfd_lookup(int fd, int *err); #define sockfd_put(sock) fput(sock->file) extern int net_ratelimit(void); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 4507f744f44..cdf137af7ad 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1285,7 +1285,7 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) int fd, ret; csocket->sk->sk_allocation = GFP_NOIO; - fd = sock_map_fd(csocket); + fd = sock_map_fd(csocket, 0); if (fd < 0) { P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n"); return fd; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 79bece16aed..dbb79adf8f3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3910,7 +3910,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval goto out; /* Map the socket to an unused fd that can be returned to the user. */ - retval = sock_map_fd(newsock); + retval = sock_map_fd(newsock, 0); if (retval < 0) { sock_release(newsock); goto out; diff --git a/net/socket.c b/net/socket.c index 1ba57d88898..64601f90035 100644 --- a/net/socket.c +++ b/net/socket.c @@ -349,11 +349,11 @@ static struct dentry_operations sockfs_dentry_operations = { * but we take care of internal coherence yet. */ -static int sock_alloc_fd(struct file **filep) +static int sock_alloc_fd(struct file **filep, int flags) { int fd; - fd = get_unused_fd(); + fd = get_unused_fd_flags(flags); if (likely(fd >= 0)) { struct file *file = get_empty_filp(); @@ -396,10 +396,10 @@ static int sock_attach_fd(struct socket *sock, struct file *file) return 0; } -int sock_map_fd(struct socket *sock) +int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; - int fd = sock_alloc_fd(&newfile); + int fd = sock_alloc_fd(&newfile, flags); if (likely(fd >= 0)) { int err = sock_attach_fd(sock, newfile); @@ -1218,12 +1218,18 @@ asmlinkage long sys_socket(int family, int type, int protocol) { int retval; struct socket *sock; + int flags; + + flags = type & ~SOCK_TYPE_MASK; + if (flags & ~SOCK_CLOEXEC) + return -EINVAL; + type &= SOCK_TYPE_MASK; retval = sock_create(family, type, protocol, &sock); if (retval < 0) goto out; - retval = sock_map_fd(sock); + retval = sock_map_fd(sock, flags & O_CLOEXEC); if (retval < 0) goto out_release; @@ -1246,6 +1252,12 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, struct socket *sock1, *sock2; int fd1, fd2, err; struct file *newfile1, *newfile2; + int flags; + + flags = type & ~SOCK_TYPE_MASK; + if (flags & ~SOCK_CLOEXEC) + return -EINVAL; + type &= SOCK_TYPE_MASK; /* * Obtain the first socket and check if the underlying protocol @@ -1264,13 +1276,13 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, if (err < 0) goto out_release_both; - fd1 = sock_alloc_fd(&newfile1); + fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC); if (unlikely(fd1 < 0)) { err = fd1; goto out_release_both; } - fd2 = sock_alloc_fd(&newfile2); + fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); if (unlikely(fd2 < 0)) { err = fd2; put_filp(newfile1); @@ -1426,7 +1438,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, */ __module_get(newsock->ops->owner); - newfd = sock_alloc_fd(&newfile); + newfd = sock_alloc_fd(&newfile, 0); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); -- cgit v1.2.3-70-g09d2 From aaca0bdca573f3f51ea03139f9c7289541e7bca3 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 23 Jul 2008 21:29:20 -0700 Subject: flag parameters: paccept This patch is by far the most complex in the series. It adds a new syscall paccept. This syscall differs from accept in that it adds (at the userlevel) two additional parameters: - a signal mask - a flags value The flags parameter can be used to set flag like SOCK_CLOEXEC. This is imlpemented here as well. Some people argued that this is a property which should be inherited from the file desriptor for the server but this is against POSIX. Additionally, we really want the signal mask parameter as well (similar to pselect, ppoll, etc). So an interface change in inevitable. The flag value is the same as for socket and socketpair. I think diverging here will only create confusion. Similar to the filesystem interfaces where the use of the O_* constants differs, it is acceptable here. The signal mask is handled as for pselect etc. The mask is temporarily installed for the thread and removed before the call returns. I modeled the code after pselect. If there is a problem it's likely also in pselect. For architectures which use socketcall I maintained this interface instead of adding a system call. The symmetry shouldn't be broken. The following test must be adjusted for architectures other than x86 and x86-64 and in case the syscall numbers changed. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #include #include #include #include #include #include #include #include #include #ifndef __NR_paccept # ifdef __x86_64__ # define __NR_paccept 288 # elif defined __i386__ # define SYS_PACCEPT 18 # define USE_SOCKETCALL 1 # else # error "need __NR_paccept" # endif #endif #ifdef USE_SOCKETCALL # define paccept(fd, addr, addrlen, mask, flags) \ ({ long args[6] = { \ (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \ syscall (__NR_socketcall, SYS_PACCEPT, args); }) #else # define paccept(fd, addr, addrlen, mask, flags) \ syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags) #endif #define PORT 57392 #define SOCK_CLOEXEC O_CLOEXEC static pthread_barrier_t b; static void * tf (void *arg) { pthread_barrier_wait (&b); int s = socket (AF_INET, SOCK_STREAM, 0); struct sockaddr_in sin; sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK); sin.sin_port = htons (PORT); connect (s, (const struct sockaddr *) &sin, sizeof (sin)); close (s); pthread_barrier_wait (&b); s = socket (AF_INET, SOCK_STREAM, 0); sin.sin_port = htons (PORT); connect (s, (const struct sockaddr *) &sin, sizeof (sin)); close (s); pthread_barrier_wait (&b); pthread_barrier_wait (&b); sleep (2); pthread_kill ((pthread_t) arg, SIGUSR1); return NULL; } static void handler (int s) { } int main (void) { pthread_barrier_init (&b, NULL, 2); struct sockaddr_in sin; pthread_t th; if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0) { puts ("pthread_create failed"); return 1; } int s = socket (AF_INET, SOCK_STREAM, 0); int reuse = 1; setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse)); sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK); sin.sin_port = htons (PORT); bind (s, (struct sockaddr *) &sin, sizeof (sin)); listen (s, SOMAXCONN); pthread_barrier_wait (&b); int s2 = paccept (s, NULL, 0, NULL, 0); if (s2 < 0) { puts ("paccept(0) failed"); return 1; } int coe = fcntl (s2, F_GETFD); if (coe & FD_CLOEXEC) { puts ("paccept(0) set close-on-exec-flag"); return 1; } close (s2); pthread_barrier_wait (&b); s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC); if (s2 < 0) { puts ("paccept(SOCK_CLOEXEC) failed"); return 1; } coe = fcntl (s2, F_GETFD); if ((coe & FD_CLOEXEC) == 0) { puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag"); return 1; } close (s2); pthread_barrier_wait (&b); struct sigaction sa; sa.sa_handler = handler; sa.sa_flags = 0; sigemptyset (&sa.sa_mask); sigaction (SIGUSR1, &sa, NULL); sigset_t ss; pthread_sigmask (SIG_SETMASK, NULL, &ss); sigaddset (&ss, SIGUSR1); pthread_sigmask (SIG_SETMASK, &ss, NULL); sigdelset (&ss, SIGUSR1); alarm (4); pthread_barrier_wait (&b); errno = 0 ; s2 = paccept (s, NULL, 0, &ss, 0); if (s2 != -1 || errno != EINTR) { puts ("paccept did not fail with EINTR"); return 1; } close (s); puts ("OK"); return 0; } ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ [akpm@linux-foundation.org: make it compile] [akpm@linux-foundation.org: add sys_ni stub] Signed-off-by: Ulrich Drepper Acked-by: Davide Libenzi Cc: Michael Kerrisk Cc: Cc: "David S. Miller" Cc: Roland McGrath Cc: Kyle McMartin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/socket.h | 5 +++ include/asm-parisc/socket.h | 5 +++ include/asm-x86/unistd_64.h | 2 ++ include/linux/net.h | 3 ++ include/linux/syscalls.h | 2 ++ kernel/sys_ni.c | 1 + net/compat.c | 52 ++++++++++++++++++++++++++--- net/socket.c | 81 ++++++++++++++++++++++++++++++++++++++++----- 8 files changed, 139 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h index 08c97931992..a1057c2d95e 100644 --- a/include/asm-alpha/socket.h +++ b/include/asm-alpha/socket.h @@ -62,4 +62,9 @@ #define SO_MARK 36 +/* O_NONBLOCK clashes with the bits used for socket types. Therefore we + * have to define SOCK_NONBLOCK to a different value here. + */ +#define SOCK_NONBLOCK 0x40000000 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h index 69a7a0d30b0..fba402c95ac 100644 --- a/include/asm-parisc/socket.h +++ b/include/asm-parisc/socket.h @@ -54,4 +54,9 @@ #define SO_MARK 0x401f +/* O_NONBLOCK clashes with the bits used for socket types. Therefore we + * have to define SOCK_NONBLOCK to a different value here. + */ +#define SOCK_NONBLOCK 0x40000000 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h index 9c1a4a3470d..e323994a370 100644 --- a/include/asm-x86/unistd_64.h +++ b/include/asm-x86/unistd_64.h @@ -639,6 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate) __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) #define __NR_timerfd_gettime 287 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) +#define __NR_paccept 288 +__SYSCALL(__NR_paccept, sys_paccept) #ifndef __NO_STUBS diff --git a/include/linux/net.h b/include/linux/net.h index 8b5383c45b4..3a9b06d4d0f 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -47,6 +47,7 @@ struct net; #define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */ #define SYS_SENDMSG 16 /* sys_sendmsg(2) */ #define SYS_RECVMSG 17 /* sys_recvmsg(2) */ +#define SYS_PACCEPT 18 /* sys_paccept(2) */ typedef enum { SS_FREE = 0, /* not allocated */ @@ -219,6 +220,8 @@ extern int sock_map_fd(struct socket *sock, int flags); extern struct socket *sockfd_lookup(int fd, int *err); #define sockfd_put(sock) fput(sock->file) extern int net_ratelimit(void); +extern long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags); #define net_random() random32() #define net_srandom(seed) srandom32((__force u32)seed) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4394dadff81..2a2a40af6b2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -409,6 +409,8 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname, asmlinkage long sys_bind(int, struct sockaddr __user *, int); asmlinkage long sys_connect(int, struct sockaddr __user *, int); asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *); +asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *, + const sigset_t *, size_t, int); asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *); asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *); asmlinkage long sys_send(int, void __user *, size_t, unsigned); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 0fea0ee12da..2f0b8a2e600 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -31,6 +31,7 @@ cond_syscall(sys_socketpair); cond_syscall(sys_bind); cond_syscall(sys_listen); cond_syscall(sys_accept); +cond_syscall(sys_paccept); cond_syscall(sys_connect); cond_syscall(sys_getsockname); cond_syscall(sys_getpeername); diff --git a/net/compat.c b/net/compat.c index 6e1b03b5193..67fb6a3834a 100644 --- a/net/compat.c +++ b/net/compat.c @@ -722,9 +722,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt); /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) -static unsigned char nas[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), +static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)}; + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), + AL(6)}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -737,13 +738,52 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } +asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize, int flags) +{ + compat_sigset_t ss32; + sigset_t ksigmask, sigsaved; + int ret; + + if (sigmask) { + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + if (copy_from_user(&ss32, sigmask, sizeof(ss32))) + return -EFAULT; + sigset_from_compat(&ksigmask, &ss32); + + sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + + ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); + + if (ret == -ERESTARTNOHAND) { + /* + * Don't restore the signal mask yet. Let do_signal() deliver + * the signal on the way back to userspace, before the signal + * mask is restored. + */ + if (sigmask) { + memcpy(¤t->saved_sigmask, &sigsaved, + sizeof(sigsaved)); + set_restore_sigmask(); + } + } else if (sigmask) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + return ret; +} + asmlinkage long compat_sys_socketcall(int call, u32 __user *args) { int ret; u32 a[6]; u32 a0, a1; - if (call < SYS_SOCKET || call > SYS_RECVMSG) + if (call < SYS_SOCKET || call > SYS_PACCEPT) return -EINVAL; if (copy_from_user(a, args, nas[call])) return -EFAULT; @@ -764,7 +804,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) ret = sys_listen(a0, a1); break; case SYS_ACCEPT: - ret = sys_accept(a0, compat_ptr(a1), compat_ptr(a[2])); + ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0); break; case SYS_GETSOCKNAME: ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); @@ -804,6 +844,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) case SYS_RECVMSG: ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; + case SYS_PACCEPT: + ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]), + compat_ptr(a[3]), a[4], a[5]); + break; default: ret = -EINVAL; break; diff --git a/net/socket.c b/net/socket.c index 64601f90035..a0ce8ad7225 100644 --- a/net/socket.c +++ b/net/socket.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -1225,6 +1226,9 @@ asmlinkage long sys_socket(int family, int type, int protocol) return -EINVAL; type &= SOCK_TYPE_MASK; + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; + retval = sock_create(family, type, protocol, &sock); if (retval < 0) goto out; @@ -1259,6 +1263,9 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, return -EINVAL; type &= SOCK_TYPE_MASK; + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; + /* * Obtain the first socket and check if the underlying protocol * supports the socketpair call. @@ -1413,14 +1420,20 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen) +long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) { struct socket *sock, *newsock; struct file *newfile; int err, len, newfd, fput_needed; struct sockaddr_storage address; + if (flags & ~SOCK_CLOEXEC) + return -EINVAL; + + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1438,7 +1451,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, */ __module_get(newsock->ops->owner); - newfd = sock_alloc_fd(&newfile, 0); + newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); @@ -1491,6 +1504,50 @@ out_fd: goto out_put; } +asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, + const sigset_t __user *sigmask, + size_t sigsetsize, int flags) +{ + sigset_t ksigmask, sigsaved; + int ret; + + if (sigmask) { + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) + return -EFAULT; + + sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + + ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); + + if (ret < 0 && signal_pending(current)) { + /* + * Don't restore the signal mask yet. Let do_signal() deliver + * the signal on the way back to userspace, before the signal + * mask is restored. + */ + if (sigmask) { + memcpy(¤t->saved_sigmask, &sigsaved, + sizeof(sigsaved)); + set_restore_sigmask(); + } + } else if (sigmask) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + return ret; +} + +asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen) +{ + return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0); +} + /* * Attempt to connect to a socket with the server address. The address * is in user space so we verify it is OK and move it to kernel space. @@ -2011,10 +2068,11 @@ out: /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) -static const unsigned char nargs[18]={ +static const unsigned char nargs[19]={ AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3) + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), + AL(6) }; #undef AL @@ -2033,7 +2091,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) unsigned long a0, a1; int err; - if (call < 1 || call > SYS_RECVMSG) + if (call < 1 || call > SYS_PACCEPT) return -EINVAL; /* copy_from_user should be SMP safe. */ @@ -2062,8 +2120,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) break; case SYS_ACCEPT: err = - sys_accept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); + do_accept(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], 0); break; case SYS_GETSOCKNAME: err = @@ -2110,6 +2168,13 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) case SYS_RECVMSG: err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; + case SYS_PACCEPT: + err = + sys_paccept(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], + (const sigset_t __user *) a[3], + a[4], a[5]); + break; default: err = -EINVAL; break; -- cgit v1.2.3-70-g09d2 From c019bbc612f6633ede7ed67725cbf68de45ae8a4 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 23 Jul 2008 21:29:21 -0700 Subject: flag parameters: paccept w/out set_restore_sigmask Some platforms do not have support to restore the signal mask in the return path from a syscall. For those platforms syscalls like pselect are not defined at all. This is, I think, not a good choice for paccept() since paccept() adds more value on top of accept() than just the signal mask handling. Therefore this patch defines a scaled down version of the sys_paccept function for those platforms. It returns -EINVAL in case the signal mask is non-NULL but behaves the same otherwise. Note that I explicitly included . I saw that it is currently included but indirectly two levels down. There is too much risk in relying on this. The header might change and then suddenly the function definition would change without anyone immediately noticing. Signed-off-by: Ulrich Drepper Cc: Davide Libenzi Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/net.h | 3 +++ net/socket.c | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'net') diff --git a/include/linux/net.h b/include/linux/net.h index 3a9b06d4d0f..39a23af059b 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -102,6 +102,9 @@ enum sock_type { /* Flags for socket, socketpair, paccept */ #define SOCK_CLOEXEC O_CLOEXEC +#ifndef SOCK_NONBLOCK +#define SOCK_NONBLOCK O_NONBLOCK +#endif #endif /* ARCH_HAS_SOCKET_TYPES */ diff --git a/net/socket.c b/net/socket.c index a0ce8ad7225..d163adff95b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -69,6 +69,7 @@ #include #include #include +#include #include #include #include @@ -1504,6 +1505,7 @@ out_fd: goto out_put; } +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, const sigset_t __user *sigmask, @@ -1541,6 +1543,21 @@ asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, return ret; } +#else +asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, + const sigset_t __user *sigmask, + size_t sigsetsize, int flags) +{ + /* The platform does not support restoring the signal mask in the + * return path. So we do not allow using paccept() with a signal + * mask. */ + if (sigmask) + return -EINVAL; + + return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); +} +#endif asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) -- cgit v1.2.3-70-g09d2 From 77d2720059618b9b6e827a8b73831eb6c6fad63c Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 23 Jul 2008 21:29:35 -0700 Subject: flag parameters: NONBLOCK in socket and socketpair This patch introduces support for the SOCK_NONBLOCK flag in socket, socketpair, and paccept. To do this the internal function sock_attach_fd gets an additional parameter which it uses to set the appropriate flag for the file descriptor. Given that in modern, scalable programs almost all socket connections are non-blocking and the minimal additional cost for the new functionality I see no reason not to add this code. The following test must be adjusted for architectures other than x86 and x86-64 and in case the syscall numbers changed. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #include #include #include #include #include #include #include #ifndef __NR_paccept # ifdef __x86_64__ # define __NR_paccept 288 # elif defined __i386__ # define SYS_PACCEPT 18 # define USE_SOCKETCALL 1 # else # error "need __NR_paccept" # endif #endif #ifdef USE_SOCKETCALL # define paccept(fd, addr, addrlen, mask, flags) \ ({ long args[6] = { \ (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \ syscall (__NR_socketcall, SYS_PACCEPT, args); }) #else # define paccept(fd, addr, addrlen, mask, flags) \ syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags) #endif #define PORT 57392 #define SOCK_NONBLOCK O_NONBLOCK static pthread_barrier_t b; static void * tf (void *arg) { pthread_barrier_wait (&b); int s = socket (AF_INET, SOCK_STREAM, 0); struct sockaddr_in sin; sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK); sin.sin_port = htons (PORT); connect (s, (const struct sockaddr *) &sin, sizeof (sin)); close (s); pthread_barrier_wait (&b); pthread_barrier_wait (&b); s = socket (AF_INET, SOCK_STREAM, 0); sin.sin_port = htons (PORT); connect (s, (const struct sockaddr *) &sin, sizeof (sin)); close (s); pthread_barrier_wait (&b); return NULL; } int main (void) { int fd; fd = socket (PF_INET, SOCK_STREAM, 0); if (fd == -1) { puts ("socket(0) failed"); return 1; } int fl = fcntl (fd, F_GETFL); if (fl == -1) { puts ("fcntl failed"); return 1; } if (fl & O_NONBLOCK) { puts ("socket(0) set non-blocking mode"); return 1; } close (fd); fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0); if (fd == -1) { puts ("socket(SOCK_NONBLOCK) failed"); return 1; } fl = fcntl (fd, F_GETFL); if (fl == -1) { puts ("fcntl failed"); return 1; } if ((fl & O_NONBLOCK) == 0) { puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode"); return 1; } close (fd); int fds[2]; if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1) { puts ("socketpair(0) failed"); return 1; } for (int i = 0; i < 2; ++i) { fl = fcntl (fds[i], F_GETFL); if (fl == -1) { puts ("fcntl failed"); return 1; } if (fl & O_NONBLOCK) { printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i); return 1; } close (fds[i]); } if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1) { puts ("socketpair(SOCK_NONBLOCK) failed"); return 1; } for (int i = 0; i < 2; ++i) { fl = fcntl (fds[i], F_GETFL); if (fl == -1) { puts ("fcntl failed"); return 1; } if ((fl & O_NONBLOCK) == 0) { printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i); return 1; } close (fds[i]); } pthread_barrier_init (&b, NULL, 2); struct sockaddr_in sin; pthread_t th; if (pthread_create (&th, NULL, tf, NULL) != 0) { puts ("pthread_create failed"); return 1; } int s = socket (AF_INET, SOCK_STREAM, 0); int reuse = 1; setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse)); sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK); sin.sin_port = htons (PORT); bind (s, (struct sockaddr *) &sin, sizeof (sin)); listen (s, SOMAXCONN); pthread_barrier_wait (&b); int s2 = paccept (s, NULL, 0, NULL, 0); if (s2 < 0) { puts ("paccept(0) failed"); return 1; } fl = fcntl (s2, F_GETFL); if (fl & O_NONBLOCK) { puts ("paccept(0) set non-blocking mode"); return 1; } close (s2); close (s); pthread_barrier_wait (&b); s = socket (AF_INET, SOCK_STREAM, 0); sin.sin_port = htons (PORT); setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse)); bind (s, (struct sockaddr *) &sin, sizeof (sin)); listen (s, SOMAXCONN); pthread_barrier_wait (&b); s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK); if (s2 < 0) { puts ("paccept(SOCK_NONBLOCK) failed"); return 1; } fl = fcntl (s2, F_GETFL); if ((fl & O_NONBLOCK) == 0) { puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode"); return 1; } close (s2); close (s); pthread_barrier_wait (&b); puts ("OK"); return 0; } ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Ulrich Drepper Acked-by: Davide Libenzi Cc: Michael Kerrisk Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/net.h | 2 +- net/socket.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/linux/net.h b/include/linux/net.h index 39a23af059b..2f999fbb188 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -20,7 +20,7 @@ #include #include -#include /* For O_CLOEXEC */ +#include /* For O_CLOEXEC and O_NONBLOCK */ #include struct poll_table_struct; diff --git a/net/socket.c b/net/socket.c index d163adff95b..31105f9048a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -369,7 +369,7 @@ static int sock_alloc_fd(struct file **filep, int flags) return fd; } -static int sock_attach_fd(struct socket *sock, struct file *file) +static int sock_attach_fd(struct socket *sock, struct file *file, int flags) { struct dentry *dentry; struct qstr name = { .name = "" }; @@ -391,7 +391,7 @@ static int sock_attach_fd(struct socket *sock, struct file *file) init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE, &socket_file_ops); SOCK_INODE(sock)->i_fop = &socket_file_ops; - file->f_flags = O_RDWR; + file->f_flags = O_RDWR | (flags & O_NONBLOCK); file->f_pos = 0; file->private_data = sock; @@ -404,7 +404,7 @@ int sock_map_fd(struct socket *sock, int flags) int fd = sock_alloc_fd(&newfile, flags); if (likely(fd >= 0)) { - int err = sock_attach_fd(sock, newfile); + int err = sock_attach_fd(sock, newfile, flags); if (unlikely(err < 0)) { put_filp(newfile); @@ -1223,7 +1223,7 @@ asmlinkage long sys_socket(int family, int type, int protocol) int flags; flags = type & ~SOCK_TYPE_MASK; - if (flags & ~SOCK_CLOEXEC) + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return -EINVAL; type &= SOCK_TYPE_MASK; @@ -1234,7 +1234,7 @@ asmlinkage long sys_socket(int family, int type, int protocol) if (retval < 0) goto out; - retval = sock_map_fd(sock, flags & O_CLOEXEC); + retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); if (retval < 0) goto out_release; @@ -1260,7 +1260,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int flags; flags = type & ~SOCK_TYPE_MASK; - if (flags & ~SOCK_CLOEXEC) + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return -EINVAL; type &= SOCK_TYPE_MASK; @@ -1298,12 +1298,12 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, goto out_release_both; } - err = sock_attach_fd(sock1, newfile1); + err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK); if (unlikely(err < 0)) { goto out_fd2; } - err = sock_attach_fd(sock2, newfile2); + err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK); if (unlikely(err < 0)) { fput(newfile1); goto out_fd1; @@ -1429,7 +1429,7 @@ long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, int err, len, newfd, fput_needed; struct sockaddr_storage address; - if (flags & ~SOCK_CLOEXEC) + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return -EINVAL; if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) @@ -1459,7 +1459,7 @@ long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, goto out_put; } - err = sock_attach_fd(newsock, newfile); + err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK); if (err < 0) goto out_fd_simple; -- cgit v1.2.3-70-g09d2 From e38b36f325153eaadd1c2a7abc5762079233e540 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 23 Jul 2008 21:29:42 -0700 Subject: flag parameters: check magic constants This patch adds test that ensure the boundary conditions for the various constants introduced in the previous patches is met. No code is generated. [akpm@linux-foundation.org: fix alpha] Signed-off-by: Ulrich Drepper Acked-by: Davide Libenzi Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventfd.c | 4 ++++ fs/eventpoll.c | 3 +++ fs/inotify_user.c | 4 ++++ fs/signalfd.c | 4 ++++ fs/timerfd.c | 4 ++++ net/socket.c | 6 ++++++ 6 files changed, 25 insertions(+) (limited to 'net') diff --git a/fs/eventfd.c b/fs/eventfd.c index 3ed4466177a..08bf558d040 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -203,6 +203,10 @@ asmlinkage long sys_eventfd2(unsigned int count, int flags) int fd; struct eventfd_ctx *ctx; + /* Check the EFD_* constants for consistency. */ + BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); + if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK)) return -EINVAL; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 3fd4014f3c5..2fdad420404 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1051,6 +1051,9 @@ asmlinkage long sys_epoll_create2(int size, int flags) int error, fd = -1; struct eventpoll *ep; + /* Check the EPOLL_* constant for consistency. */ + BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); + if (flags & ~EPOLL_CLOEXEC) return -EINVAL; diff --git a/fs/inotify_user.c b/fs/inotify_user.c index dc7e1f61974..fe79c25d95d 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -574,6 +574,10 @@ asmlinkage long sys_inotify_init1(int flags) struct file *filp; int fd, ret; + /* Check the IN_* constants for consistency. */ + BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK); + if (flags & ~(IN_CLOEXEC | IN_NONBLOCK)) return -EINVAL; diff --git a/fs/signalfd.c b/fs/signalfd.c index 5441a4bca77..9c39bc7f843 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -211,6 +211,10 @@ asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, sigset_t sigmask; struct signalfd_ctx *ctx; + /* Check the SFD_* constants for consistency. */ + BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK); + if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK)) return -EINVAL; diff --git a/fs/timerfd.c b/fs/timerfd.c index 75d44efe346..c502c60e4f5 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -184,6 +184,10 @@ asmlinkage long sys_timerfd_create(int clockid, int flags) int ufd; struct timerfd_ctx *ctx; + /* Check the TFD_* constants for consistency. */ + BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); + if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) return -EINVAL; if (clockid != CLOCK_MONOTONIC && diff --git a/net/socket.c b/net/socket.c index 31105f9048a..1310a82cbba 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1222,6 +1222,12 @@ asmlinkage long sys_socket(int family, int type, int protocol) struct socket *sock; int flags; + /* Check the SOCK_* constants for consistency. */ + BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); + BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); + BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); + flags = type & ~SOCK_TYPE_MASK; if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return -EINVAL; -- cgit v1.2.3-70-g09d2 From cffe1c5d7a5a1e54f7c2c6d0510f651a965bccc3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 25 Jul 2008 01:25:04 -0700 Subject: pkt_sched: Fix locking in shutdown_scheduler_queue() Qdisc locks need to be held with BH disabled. Tested-by: Ingo Molnar Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 4ac7e3a8c25..43abd4d27ea 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -736,9 +736,9 @@ static void shutdown_scheduler_queue(struct net_device *dev, dev_queue->qdisc = qdisc_default; dev_queue->qdisc_sleeping = qdisc_default; - spin_lock(root_lock); + spin_lock_bh(root_lock); qdisc_destroy(qdisc); - spin_unlock(root_lock); + spin_unlock_bh(root_lock); } } -- cgit v1.2.3-70-g09d2 From 6fccab671f2f0a24b799f29a4ec878f62d34656c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Jul 2008 02:54:40 -0700 Subject: ipsec: ipcomp - Merge IPComp implementations This patch merges the IPv4/IPv6 IPComp implementations since most of the code is identical. As a result future enhancements will no longer need to be duplicated. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/ipcomp.h | 6 + net/ipv4/Kconfig | 4 +- net/ipv4/ipcomp.c | 315 +------------------------------------------- net/ipv6/Kconfig | 4 +- net/ipv6/ipcomp6.c | 298 +---------------------------------------- net/xfrm/Kconfig | 6 + net/xfrm/Makefile | 1 + net/xfrm/xfrm_ipcomp.c | 349 +++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 377 insertions(+), 606 deletions(-) create mode 100644 net/xfrm/xfrm_ipcomp.c (limited to 'net') diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index 330b74e813a..2a1092abaa0 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -14,6 +14,12 @@ struct ipcomp_data { struct ip_comp_hdr; struct sk_buff; +struct xfrm_state; + +int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb); +int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb); +void ipcomp_destroy(struct xfrm_state *x); +int ipcomp_init_state(struct xfrm_state *x); static inline struct ip_comp_hdr *ip_comp_hdr(const struct sk_buff *skb) { diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 4670683b468..591ea23639c 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -356,10 +356,8 @@ config INET_ESP config INET_IPCOMP tristate "IP: IPComp transformation" - select XFRM select INET_XFRM_TUNNEL - select CRYPTO - select CRYPTO_DEFLATE + select XFRM_IPCOMP ---help--- Support for IP Payload Compression Protocol (IPComp) (RFC3173), typically needed for IPsec. diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index a75807b971b..a42b64d040c 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -14,153 +14,14 @@ * - Adaptive compression. */ #include -#include #include -#include -#include -#include -#include -#include #include -#include #include #include #include #include #include - -struct ipcomp_tfms { - struct list_head list; - struct crypto_comp **tfms; - int users; -}; - -static DEFINE_MUTEX(ipcomp_resource_mutex); -static void **ipcomp_scratches; -static int ipcomp_scratch_users; -static LIST_HEAD(ipcomp_tfms_list); - -static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipcomp_data *ipcd = x->data; - const int plen = skb->len; - int dlen = IPCOMP_SCRATCH_SIZE; - const u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); - int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); - - if (err) - goto out; - - if (dlen < (plen + sizeof(struct ip_comp_hdr))) { - err = -EINVAL; - goto out; - } - - err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC); - if (err) - goto out; - - skb->truesize += dlen - plen; - __skb_put(skb, dlen - plen); - skb_copy_to_linear_data(skb, scratch, dlen); -out: - put_cpu(); - return err; -} - -static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int nexthdr; - int err = -ENOMEM; - struct ip_comp_hdr *ipch; - - if (skb_linearize_cow(skb)) - goto out; - - skb->ip_summed = CHECKSUM_NONE; - - /* Remove ipcomp header and decompress original payload */ - ipch = (void *)skb->data; - nexthdr = ipch->nexthdr; - - skb->transport_header = skb->network_header + sizeof(*ipch); - __skb_pull(skb, sizeof(*ipch)); - err = ipcomp_decompress(x, skb); - if (err) - goto out; - - err = nexthdr; - -out: - return err; -} - -static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipcomp_data *ipcd = x->data; - const int plen = skb->len; - int dlen = IPCOMP_SCRATCH_SIZE; - u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); - int err; - - local_bh_disable(); - err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - local_bh_enable(); - if (err) - goto out; - - if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) { - err = -EMSGSIZE; - goto out; - } - - memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); - put_cpu(); - - pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); - return 0; - -out: - put_cpu(); - return err; -} - -static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - struct ip_comp_hdr *ipch; - struct ipcomp_data *ipcd = x->data; - - if (skb->len < ipcd->threshold) { - /* Don't bother compressing */ - goto out_ok; - } - - if (skb_linearize_cow(skb)) - goto out_ok; - - err = ipcomp_compress(x, skb); - - if (err) { - goto out_ok; - } - - /* Install ipcomp header, convert into ipcomp datagram. */ - ipch = ip_comp_hdr(skb); - ipch->nexthdr = *skb_mac_header(skb); - ipch->flags = 0; - ipch->cpi = htons((u16 )ntohl(x->id.spi)); - *skb_mac_header(skb) = IPPROTO_COMP; -out_ok: - skb_push(skb, -skb_network_offset(skb)); - return 0; -} +#include static void ipcomp4_err(struct sk_buff *skb, u32 info) { @@ -241,156 +102,12 @@ out: return err; } -static void ipcomp_free_scratches(void) -{ - int i; - void **scratches; - - if (--ipcomp_scratch_users) - return; - - scratches = ipcomp_scratches; - if (!scratches) - return; - - for_each_possible_cpu(i) - vfree(*per_cpu_ptr(scratches, i)); - - free_percpu(scratches); -} - -static void **ipcomp_alloc_scratches(void) -{ - int i; - void **scratches; - - if (ipcomp_scratch_users++) - return ipcomp_scratches; - - scratches = alloc_percpu(void *); - if (!scratches) - return NULL; - - ipcomp_scratches = scratches; - - for_each_possible_cpu(i) { - void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); - if (!scratch) - return NULL; - *per_cpu_ptr(scratches, i) = scratch; - } - - return scratches; -} - -static void ipcomp_free_tfms(struct crypto_comp **tfms) -{ - struct ipcomp_tfms *pos; - int cpu; - - list_for_each_entry(pos, &ipcomp_tfms_list, list) { - if (pos->tfms == tfms) - break; - } - - BUG_TRAP(pos); - - if (--pos->users) - return; - - list_del(&pos->list); - kfree(pos); - - if (!tfms) - return; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_comp(tfm); - } - free_percpu(tfms); -} - -static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) -{ - struct ipcomp_tfms *pos; - struct crypto_comp **tfms; - int cpu; - - /* This can be any valid CPU ID so we don't need locking. */ - cpu = raw_smp_processor_id(); - - list_for_each_entry(pos, &ipcomp_tfms_list, list) { - struct crypto_comp *tfm; - - tfms = pos->tfms; - tfm = *per_cpu_ptr(tfms, cpu); - - if (!strcmp(crypto_comp_name(tfm), alg_name)) { - pos->users++; - return tfms; - } - } - - pos = kmalloc(sizeof(*pos), GFP_KERNEL); - if (!pos) - return NULL; - - pos->users = 1; - INIT_LIST_HEAD(&pos->list); - list_add(&pos->list, &ipcomp_tfms_list); - - pos->tfms = tfms = alloc_percpu(struct crypto_comp *); - if (!tfms) - goto error; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto error; - *per_cpu_ptr(tfms, cpu) = tfm; - } - - return tfms; - -error: - ipcomp_free_tfms(tfms); - return NULL; -} - -static void ipcomp_free_data(struct ipcomp_data *ipcd) -{ - if (ipcd->tfms) - ipcomp_free_tfms(ipcd->tfms); - ipcomp_free_scratches(); -} - -static void ipcomp_destroy(struct xfrm_state *x) -{ - struct ipcomp_data *ipcd = x->data; - if (!ipcd) - return; - xfrm_state_delete_tunnel(x); - mutex_lock(&ipcomp_resource_mutex); - ipcomp_free_data(ipcd); - mutex_unlock(&ipcomp_resource_mutex); - kfree(ipcd); -} - -static int ipcomp_init_state(struct xfrm_state *x) +static int ipcomp4_init_state(struct xfrm_state *x) { int err; struct ipcomp_data *ipcd; struct xfrm_algo_desc *calg_desc; - err = -EINVAL; - if (!x->calg) - goto out; - - if (x->encap) - goto out; - x->props.header_len = 0; switch (x->props.mode) { case XFRM_MODE_TRANSPORT: @@ -402,40 +119,22 @@ static int ipcomp_init_state(struct xfrm_state *x) goto out; } - err = -ENOMEM; - ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); - if (!ipcd) + err = ipcomp_init_state(x); + if (err) goto out; - mutex_lock(&ipcomp_resource_mutex); - if (!ipcomp_alloc_scratches()) - goto error; - - ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name); - if (!ipcd->tfms) - goto error; - mutex_unlock(&ipcomp_resource_mutex); - if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) goto error_tunnel; } - calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0); - BUG_ON(!calg_desc); - ipcd->threshold = calg_desc->uinfo.comp.threshold; - x->data = ipcd; err = 0; out: return err; error_tunnel: - mutex_lock(&ipcomp_resource_mutex); -error: - ipcomp_free_data(ipcd); - mutex_unlock(&ipcomp_resource_mutex); - kfree(ipcd); + ipcomp_destroy(x); goto out; } @@ -443,7 +142,7 @@ static const struct xfrm_type ipcomp_type = { .description = "IPCOMP4", .owner = THIS_MODULE, .proto = IPPROTO_COMP, - .init_state = ipcomp_init_state, + .init_state = ipcomp4_init_state, .destructor = ipcomp_destroy, .input = ipcomp_input, .output = ipcomp_output @@ -481,7 +180,7 @@ module_init(ipcomp4_init); module_exit(ipcomp4_fini); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); +MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp/IPv4) - RFC3173"); MODULE_AUTHOR("James Morris "); MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 42814a2ec9d..ec992159b5f 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -96,10 +96,8 @@ config INET6_ESP config INET6_IPCOMP tristate "IPv6: IPComp transformation" - select XFRM select INET6_XFRM_TUNNEL - select CRYPTO - select CRYPTO_DEFLATE + select XFRM_IPCOMP ---help--- Support for IP Payload Compression Protocol (IPComp) (RFC3173), typically needed for IPsec. diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index ee6de425ce6..0cfcea42153 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -50,125 +50,6 @@ #include #include -struct ipcomp6_tfms { - struct list_head list; - struct crypto_comp **tfms; - int users; -}; - -static DEFINE_MUTEX(ipcomp6_resource_mutex); -static void **ipcomp6_scratches; -static int ipcomp6_scratch_users; -static LIST_HEAD(ipcomp6_tfms_list); - -static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int nexthdr; - int err = -ENOMEM; - struct ip_comp_hdr *ipch; - int plen, dlen; - struct ipcomp_data *ipcd = x->data; - u8 *start, *scratch; - struct crypto_comp *tfm; - int cpu; - - if (skb_linearize_cow(skb)) - goto out; - - skb->ip_summed = CHECKSUM_NONE; - - /* Remove ipcomp header and decompress original payload */ - ipch = (void *)skb->data; - nexthdr = ipch->nexthdr; - - skb->transport_header = skb->network_header + sizeof(*ipch); - __skb_pull(skb, sizeof(*ipch)); - - /* decompression */ - plen = skb->len; - dlen = IPCOMP_SCRATCH_SIZE; - start = skb->data; - - cpu = get_cpu(); - scratch = *per_cpu_ptr(ipcomp6_scratches, cpu); - tfm = *per_cpu_ptr(ipcd->tfms, cpu); - - err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); - if (err) - goto out_put_cpu; - - if (dlen < (plen + sizeof(*ipch))) { - err = -EINVAL; - goto out_put_cpu; - } - - err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC); - if (err) { - goto out_put_cpu; - } - - skb->truesize += dlen - plen; - __skb_put(skb, dlen - plen); - skb_copy_to_linear_data(skb, scratch, dlen); - err = nexthdr; - -out_put_cpu: - put_cpu(); -out: - return err; -} - -static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - struct ip_comp_hdr *ipch; - struct ipcomp_data *ipcd = x->data; - int plen, dlen; - u8 *start, *scratch; - struct crypto_comp *tfm; - int cpu; - - /* check whether datagram len is larger than threshold */ - if (skb->len < ipcd->threshold) { - goto out_ok; - } - - if (skb_linearize_cow(skb)) - goto out_ok; - - /* compression */ - plen = skb->len; - dlen = IPCOMP_SCRATCH_SIZE; - start = skb->data; - - cpu = get_cpu(); - scratch = *per_cpu_ptr(ipcomp6_scratches, cpu); - tfm = *per_cpu_ptr(ipcd->tfms, cpu); - - local_bh_disable(); - err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - local_bh_enable(); - if (err || (dlen + sizeof(*ipch)) >= plen) { - put_cpu(); - goto out_ok; - } - memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); - put_cpu(); - pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); - - /* insert ipcomp header and replace datagram */ - ipch = ip_comp_hdr(skb); - ipch->nexthdr = *skb_mac_header(skb); - ipch->flags = 0; - ipch->cpi = htons((u16 )ntohl(x->id.spi)); - *skb_mac_header(skb) = IPPROTO_COMP; - -out_ok: - skb_push(skb, -skb_network_offset(skb)); - - return 0; -} - static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { @@ -251,161 +132,12 @@ out: return err; } -static void ipcomp6_free_scratches(void) -{ - int i; - void **scratches; - - if (--ipcomp6_scratch_users) - return; - - scratches = ipcomp6_scratches; - if (!scratches) - return; - - for_each_possible_cpu(i) { - void *scratch = *per_cpu_ptr(scratches, i); - - vfree(scratch); - } - - free_percpu(scratches); -} - -static void **ipcomp6_alloc_scratches(void) -{ - int i; - void **scratches; - - if (ipcomp6_scratch_users++) - return ipcomp6_scratches; - - scratches = alloc_percpu(void *); - if (!scratches) - return NULL; - - ipcomp6_scratches = scratches; - - for_each_possible_cpu(i) { - void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); - if (!scratch) - return NULL; - *per_cpu_ptr(scratches, i) = scratch; - } - - return scratches; -} - -static void ipcomp6_free_tfms(struct crypto_comp **tfms) -{ - struct ipcomp6_tfms *pos; - int cpu; - - list_for_each_entry(pos, &ipcomp6_tfms_list, list) { - if (pos->tfms == tfms) - break; - } - - BUG_TRAP(pos); - - if (--pos->users) - return; - - list_del(&pos->list); - kfree(pos); - - if (!tfms) - return; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_comp(tfm); - } - free_percpu(tfms); -} - -static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name) -{ - struct ipcomp6_tfms *pos; - struct crypto_comp **tfms; - int cpu; - - /* This can be any valid CPU ID so we don't need locking. */ - cpu = raw_smp_processor_id(); - - list_for_each_entry(pos, &ipcomp6_tfms_list, list) { - struct crypto_comp *tfm; - - tfms = pos->tfms; - tfm = *per_cpu_ptr(tfms, cpu); - - if (!strcmp(crypto_comp_name(tfm), alg_name)) { - pos->users++; - return tfms; - } - } - - pos = kmalloc(sizeof(*pos), GFP_KERNEL); - if (!pos) - return NULL; - - pos->users = 1; - INIT_LIST_HEAD(&pos->list); - list_add(&pos->list, &ipcomp6_tfms_list); - - pos->tfms = tfms = alloc_percpu(struct crypto_comp *); - if (!tfms) - goto error; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto error; - *per_cpu_ptr(tfms, cpu) = tfm; - } - - return tfms; - -error: - ipcomp6_free_tfms(tfms); - return NULL; -} - -static void ipcomp6_free_data(struct ipcomp_data *ipcd) -{ - if (ipcd->tfms) - ipcomp6_free_tfms(ipcd->tfms); - ipcomp6_free_scratches(); -} - -static void ipcomp6_destroy(struct xfrm_state *x) -{ - struct ipcomp_data *ipcd = x->data; - if (!ipcd) - return; - xfrm_state_delete_tunnel(x); - mutex_lock(&ipcomp6_resource_mutex); - ipcomp6_free_data(ipcd); - mutex_unlock(&ipcomp6_resource_mutex); - kfree(ipcd); - - xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr); -} - static int ipcomp6_init_state(struct xfrm_state *x) { int err; struct ipcomp_data *ipcd; struct xfrm_algo_desc *calg_desc; - err = -EINVAL; - if (!x->calg) - goto out; - - if (x->encap) - goto out; - x->props.header_len = 0; switch (x->props.mode) { case XFRM_MODE_TRANSPORT: @@ -417,39 +149,21 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto out; } - err = -ENOMEM; - ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); - if (!ipcd) + err = ipcomp_init_state(x); + if (err) goto out; - mutex_lock(&ipcomp6_resource_mutex); - if (!ipcomp6_alloc_scratches()) - goto error; - - ipcd->tfms = ipcomp6_alloc_tfms(x->calg->alg_name); - if (!ipcd->tfms) - goto error; - mutex_unlock(&ipcomp6_resource_mutex); - if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) goto error_tunnel; } - calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0); - BUG_ON(!calg_desc); - ipcd->threshold = calg_desc->uinfo.comp.threshold; - x->data = ipcd; err = 0; out: return err; error_tunnel: - mutex_lock(&ipcomp6_resource_mutex); -error: - ipcomp6_free_data(ipcd); - mutex_unlock(&ipcomp6_resource_mutex); - kfree(ipcd); + ipcomp_destroy(x); goto out; } @@ -460,9 +174,9 @@ static const struct xfrm_type ipcomp6_type = .owner = THIS_MODULE, .proto = IPPROTO_COMP, .init_state = ipcomp6_init_state, - .destructor = ipcomp6_destroy, - .input = ipcomp6_input, - .output = ipcomp6_output, + .destructor = ipcomp_destroy, + .input = ipcomp_input, + .output = ipcomp_output, .hdr_offset = xfrm6_find_1stfragopt, }; diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 9201ef8ad90..6d081674515 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -46,6 +46,12 @@ config XFRM_STATISTICS If unsure, say N. +config XFRM_IPCOMP + tristate + select XFRM + select CRYPTO + select CRYPTO_DEFLATE + config NET_KEY tristate "PF_KEY sockets" select XFRM diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 332cfb0ff56..0f439a72cca 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -6,4 +6,5 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o xfrm_algo.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o +obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c new file mode 100644 index 00000000000..b51e804fbba --- /dev/null +++ b/net/xfrm/xfrm_ipcomp.c @@ -0,0 +1,349 @@ +/* + * IP Payload Compression Protocol (IPComp) - RFC3173. + * + * Copyright (c) 2003 James Morris + * Copyright (c) 2003-2008 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Todo: + * - Tunable compression parameters. + * - Compression stats. + * - Adaptive compression. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct ipcomp_tfms { + struct list_head list; + struct crypto_comp **tfms; + int users; +}; + +static DEFINE_MUTEX(ipcomp_resource_mutex); +static void **ipcomp_scratches; +static int ipcomp_scratch_users; +static LIST_HEAD(ipcomp_tfms_list); + +static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipcomp_data *ipcd = x->data; + const int plen = skb->len; + int dlen = IPCOMP_SCRATCH_SIZE; + const u8 *start = skb->data; + const int cpu = get_cpu(); + u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); + struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); + + if (err) + goto out; + + if (dlen < (plen + sizeof(struct ip_comp_hdr))) { + err = -EINVAL; + goto out; + } + + err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC); + if (err) + goto out; + + skb->truesize += dlen - plen; + __skb_put(skb, dlen - plen); + skb_copy_to_linear_data(skb, scratch, dlen); +out: + put_cpu(); + return err; +} + +int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) +{ + int nexthdr; + int err = -ENOMEM; + struct ip_comp_hdr *ipch; + + if (skb_linearize_cow(skb)) + goto out; + + skb->ip_summed = CHECKSUM_NONE; + + /* Remove ipcomp header and decompress original payload */ + ipch = (void *)skb->data; + nexthdr = ipch->nexthdr; + + skb->transport_header = skb->network_header + sizeof(*ipch); + __skb_pull(skb, sizeof(*ipch)); + err = ipcomp_decompress(x, skb); + if (err) + goto out; + + err = nexthdr; + +out: + return err; +} +EXPORT_SYMBOL_GPL(ipcomp_input); + +static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipcomp_data *ipcd = x->data; + const int plen = skb->len; + int dlen = IPCOMP_SCRATCH_SIZE; + u8 *start = skb->data; + const int cpu = get_cpu(); + u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); + struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + int err; + + local_bh_disable(); + err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); + local_bh_enable(); + if (err) + goto out; + + if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) { + err = -EMSGSIZE; + goto out; + } + + memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); + put_cpu(); + + pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); + return 0; + +out: + put_cpu(); + return err; +} + +int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + struct ip_comp_hdr *ipch; + struct ipcomp_data *ipcd = x->data; + + if (skb->len < ipcd->threshold) { + /* Don't bother compressing */ + goto out_ok; + } + + if (skb_linearize_cow(skb)) + goto out_ok; + + err = ipcomp_compress(x, skb); + + if (err) { + goto out_ok; + } + + /* Install ipcomp header, convert into ipcomp datagram. */ + ipch = ip_comp_hdr(skb); + ipch->nexthdr = *skb_mac_header(skb); + ipch->flags = 0; + ipch->cpi = htons((u16 )ntohl(x->id.spi)); + *skb_mac_header(skb) = IPPROTO_COMP; +out_ok: + skb_push(skb, -skb_network_offset(skb)); + return 0; +} +EXPORT_SYMBOL_GPL(ipcomp_output); + +static void ipcomp_free_scratches(void) +{ + int i; + void **scratches; + + if (--ipcomp_scratch_users) + return; + + scratches = ipcomp_scratches; + if (!scratches) + return; + + for_each_possible_cpu(i) + vfree(*per_cpu_ptr(scratches, i)); + + free_percpu(scratches); +} + +static void **ipcomp_alloc_scratches(void) +{ + int i; + void **scratches; + + if (ipcomp_scratch_users++) + return ipcomp_scratches; + + scratches = alloc_percpu(void *); + if (!scratches) + return NULL; + + ipcomp_scratches = scratches; + + for_each_possible_cpu(i) { + void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); + if (!scratch) + return NULL; + *per_cpu_ptr(scratches, i) = scratch; + } + + return scratches; +} + +static void ipcomp_free_tfms(struct crypto_comp **tfms) +{ + struct ipcomp_tfms *pos; + int cpu; + + list_for_each_entry(pos, &ipcomp_tfms_list, list) { + if (pos->tfms == tfms) + break; + } + + BUG_TRAP(pos); + + if (--pos->users) + return; + + list_del(&pos->list); + kfree(pos); + + if (!tfms) + return; + + for_each_possible_cpu(cpu) { + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); + } + free_percpu(tfms); +} + +static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) +{ + struct ipcomp_tfms *pos; + struct crypto_comp **tfms; + int cpu; + + /* This can be any valid CPU ID so we don't need locking. */ + cpu = raw_smp_processor_id(); + + list_for_each_entry(pos, &ipcomp_tfms_list, list) { + struct crypto_comp *tfm; + + tfms = pos->tfms; + tfm = *per_cpu_ptr(tfms, cpu); + + if (!strcmp(crypto_comp_name(tfm), alg_name)) { + pos->users++; + return tfms; + } + } + + pos = kmalloc(sizeof(*pos), GFP_KERNEL); + if (!pos) + return NULL; + + pos->users = 1; + INIT_LIST_HEAD(&pos->list); + list_add(&pos->list, &ipcomp_tfms_list); + + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); + if (!tfms) + goto error; + + for_each_possible_cpu(cpu) { + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto error; + *per_cpu_ptr(tfms, cpu) = tfm; + } + + return tfms; + +error: + ipcomp_free_tfms(tfms); + return NULL; +} + +static void ipcomp_free_data(struct ipcomp_data *ipcd) +{ + if (ipcd->tfms) + ipcomp_free_tfms(ipcd->tfms); + ipcomp_free_scratches(); +} + +void ipcomp_destroy(struct xfrm_state *x) +{ + struct ipcomp_data *ipcd = x->data; + if (!ipcd) + return; + xfrm_state_delete_tunnel(x); + mutex_lock(&ipcomp_resource_mutex); + ipcomp_free_data(ipcd); + mutex_unlock(&ipcomp_resource_mutex); + kfree(ipcd); +} +EXPORT_SYMBOL_GPL(ipcomp_destroy); + +int ipcomp_init_state(struct xfrm_state *x) +{ + int err; + struct ipcomp_data *ipcd; + struct xfrm_algo_desc *calg_desc; + + err = -EINVAL; + if (!x->calg) + goto out; + + if (x->encap) + goto out; + + err = -ENOMEM; + ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); + if (!ipcd) + goto out; + + mutex_lock(&ipcomp_resource_mutex); + if (!ipcomp_alloc_scratches()) + goto error; + + ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name); + if (!ipcd->tfms) + goto error; + mutex_unlock(&ipcomp_resource_mutex); + + calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0); + BUG_ON(!calg_desc); + ipcd->threshold = calg_desc->uinfo.comp.threshold; + x->data = ipcd; + err = 0; +out: + return err; + +error: + ipcomp_free_data(ipcd); + mutex_unlock(&ipcomp_resource_mutex); + kfree(ipcd); + goto out; +} +EXPORT_SYMBOL_GPL(ipcomp_init_state); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); +MODULE_AUTHOR("James Morris "); -- cgit v1.2.3-70-g09d2 From 7d7e5a60c62e88cb8782760bb6c4d3bd1577a6c6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Jul 2008 02:55:33 -0700 Subject: ipsec: ipcomp - Decompress into frags if necessary When decompressing extremely large packets allocating them through kmalloc is prone to failure. Therefore it's better to use page frags instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_ipcomp.c | 48 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index b51e804fbba..800f669083f 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -49,6 +50,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); + int len; if (err) goto out; @@ -58,13 +60,47 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) goto out; } - err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC); - if (err) - goto out; + len = dlen - plen; + if (len > skb_tailroom(skb)) + len = skb_tailroom(skb); + + skb->truesize += len; + __skb_put(skb, len); + + len += plen; + skb_copy_to_linear_data(skb, scratch, len); + + while ((scratch += len, dlen -= len) > 0) { + skb_frag_t *frag; + + err = -EMSGSIZE; + if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) + goto out; + + frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; + frag->page = alloc_page(GFP_ATOMIC); + + err = -ENOMEM; + if (!frag->page) + goto out; + + len = PAGE_SIZE; + if (dlen < len) + len = dlen; + + memcpy(page_address(frag->page), scratch, len); + + frag->page_offset = 0; + frag->size = len; + skb->truesize += len; + skb->data_len += len; + skb->len += len; + + skb_shinfo(skb)->nr_frags++; + } + + err = 0; - skb->truesize += dlen - plen; - __skb_put(skb, dlen - plen); - skb_copy_to_linear_data(skb, scratch, dlen); out: put_cpu(); return err; -- cgit v1.2.3-70-g09d2 From 696adfe84c11c571a1e0863460ff0ec142b4e5a9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 25 Jul 2008 01:45:34 -0700 Subject: list_for_each_rcu must die: networking All uses of list_for_each_rcu() can be profitably replaced by the easier-to-use list_for_each_entry_rcu(). This patch makes this change for networking, in preparation for removing the list_for_each_rcu() API entirely. Acked-by: David S. Miller Signed-off-by: Paul E. McKenney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/802/psnap.c | 4 +--- net/ipv4/af_inet.c | 9 +++------ net/ipv6/af_inet6.c | 9 +++------ 3 files changed, 7 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/802/psnap.c b/net/802/psnap.c index ea464393144..b3cfe5a14fc 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -31,11 +31,9 @@ static struct llc_sap *snap_sap; */ static struct datalink_proto *find_snap_client(unsigned char *desc) { - struct list_head *entry; struct datalink_proto *proto = NULL, *p; - list_for_each_rcu(entry, &snap_list) { - p = list_entry(entry, struct datalink_proto, node); + list_for_each_entry_rcu(p, &snap_list, node) { if (!memcmp(p->type, desc, 5)) { proto = p; break; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index dd919d84285..f440a9f5492 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -264,7 +264,6 @@ static inline int inet_netns_ok(struct net *net, int protocol) static int inet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; - struct list_head *p; struct inet_protosw *answer; struct inet_sock *inet; struct proto *answer_prot; @@ -281,13 +280,12 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ - answer = NULL; lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); - list_for_each_rcu(p, &inetsw[sock->type]) { - answer = list_entry(p, struct inet_protosw, list); + list_for_each_entry_rcu(answer, &inetsw[sock->type], list) { + err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) @@ -302,10 +300,9 @@ lookup_protocol: break; } err = -EPROTONOSUPPORT; - answer = NULL; } - if (unlikely(answer == NULL)) { + if (unlikely(err)) { if (try_loading_module < 2) { rcu_read_unlock(); /* diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3d828bc4b1c..60461ad7fa6 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -83,7 +83,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol) struct inet_sock *inet; struct ipv6_pinfo *np; struct sock *sk; - struct list_head *p; struct inet_protosw *answer; struct proto *answer_prot; unsigned char answer_flags; @@ -97,13 +96,12 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol) build_ehash_secret(); /* Look for the requested type/protocol pair. */ - answer = NULL; lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); - list_for_each_rcu(p, &inetsw6[sock->type]) { - answer = list_entry(p, struct inet_protosw, list); + list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) { + err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) @@ -118,10 +116,9 @@ lookup_protocol: break; } err = -EPROTONOSUPPORT; - answer = NULL; } - if (!answer) { + if (err) { if (try_loading_module < 2) { rcu_read_unlock(); /* -- cgit v1.2.3-70-g09d2 From 717115e1a5856b57af0f71e1df7149108294fc10 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Fri, 25 Jul 2008 01:45:58 -0700 Subject: printk ratelimiting rewrite All ratelimit user use same jiffies and burst params, so some messages (callbacks) will be lost. For example: a call printk_ratelimit(5 * HZ, 1) b call printk_ratelimit(5 * HZ, 1) before the 5*HZ timeout of a, then b will will be supressed. - rewrite __ratelimit, and use a ratelimit_state as parameter. Thanks for hints from andrew. - Add WARN_ON_RATELIMIT, update rcupreempt.h - remove __printk_ratelimit - use __ratelimit in net_ratelimit Signed-off-by: Dave Young Cc: "David S. Miller" Cc: "Paul E. McKenney" Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/bug.h | 3 +++ include/linux/kernel.h | 8 ++----- include/linux/net.h | 3 +-- include/linux/ratelimit.h | 27 +++++++++++++++++++++++ include/linux/rcupreempt.h | 9 ++++++-- kernel/printk.c | 17 +++----------- kernel/sysctl.c | 4 ++-- lib/ratelimit.c | 55 +++++++++++++++++++++++++--------------------- net/core/sysctl_net_core.c | 4 ++-- net/core/utils.c | 5 ++--- 10 files changed, 79 insertions(+), 56 deletions(-) create mode 100644 include/linux/ratelimit.h (limited to 'net') diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index a346e744e77..a3f738cffdb 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -97,6 +97,9 @@ extern void warn_slowpath(const char *file, const int line, unlikely(__ret_warn_once); \ }) +#define WARN_ON_RATELIMIT(condition, state) \ + WARN_ON((condition) && __ratelimit(state)) + #ifdef CONFIG_SMP # define WARN_ON_SMP(x) WARN_ON(x) #else diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5c4b1251e11..fdbbf72ca2e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -189,11 +190,8 @@ asmlinkage int vprintk(const char *fmt, va_list args) asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern int printk_ratelimit_jiffies; -extern int printk_ratelimit_burst; +extern struct ratelimit_state printk_ratelimit_state; extern int printk_ratelimit(void); -extern int __ratelimit(int ratelimit_jiffies, int ratelimit_burst); -extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); #else @@ -204,8 +202,6 @@ static inline int printk(const char *s, ...) __attribute__ ((format (printf, 1, 2))); static inline int __cold printk(const char *s, ...) { return 0; } static inline int printk_ratelimit(void) { return 0; } -static inline int __printk_ratelimit(int ratelimit_jiffies, \ - int ratelimit_burst) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ unsigned int interval_msec) \ { return false; } diff --git a/include/linux/net.h b/include/linux/net.h index 2f999fbb188..4a9a30f2d68 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -351,8 +351,7 @@ static const struct proto_ops name##_ops = { \ #ifdef CONFIG_SYSCTL #include -extern int net_msg_cost; -extern int net_msg_burst; +extern struct ratelimit_state net_ratelimit_state; #endif #endif /* __KERNEL__ */ diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h new file mode 100644 index 00000000000..18a5b9ba9d4 --- /dev/null +++ b/include/linux/ratelimit.h @@ -0,0 +1,27 @@ +#ifndef _LINUX_RATELIMIT_H +#define _LINUX_RATELIMIT_H +#include + +#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) +#define DEFAULT_RATELIMIT_BURST 10 + +struct ratelimit_state { + int interval; + int burst; + int printed; + int missed; + unsigned long begin; +}; + +#define DEFINE_RATELIMIT_STATE(name, interval, burst) \ + struct ratelimit_state name = {interval, burst,} + +extern int __ratelimit(struct ratelimit_state *rs); + +static inline int ratelimit(void) +{ + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + return __ratelimit(&rs); +} +#endif diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index f04b64eca63..0967f03b070 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -115,16 +115,21 @@ DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); static inline void rcu_enter_nohz(void) { + static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); + smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ __get_cpu_var(rcu_dyntick_sched).dynticks++; - WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1); + WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs); } static inline void rcu_exit_nohz(void) { + static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ __get_cpu_var(rcu_dyntick_sched).dynticks++; - WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1)); + WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1), + &rs); } #else /* CONFIG_NO_HZ */ diff --git a/kernel/printk.c b/kernel/printk.c index 3f7a2a94583..a7f7559c5f6 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1308,6 +1308,8 @@ void tty_write_message(struct tty_struct *tty, char *msg) } #if defined CONFIG_PRINTK + +DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); /* * printk rate limiting, lifted from the networking subsystem. * @@ -1315,22 +1317,9 @@ void tty_write_message(struct tty_struct *tty, char *msg) * every printk_ratelimit_jiffies to make a denial-of-service * attack impossible. */ -int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst) -{ - return __ratelimit(ratelimit_jiffies, ratelimit_burst); -} -EXPORT_SYMBOL(__printk_ratelimit); - -/* minimum time in jiffies between messages */ -int printk_ratelimit_jiffies = 5 * HZ; - -/* number of messages we send before ratelimiting */ -int printk_ratelimit_burst = 10; - int printk_ratelimit(void) { - return __printk_ratelimit(printk_ratelimit_jiffies, - printk_ratelimit_burst); + return __ratelimit(&printk_ratelimit_state); } EXPORT_SYMBOL(printk_ratelimit); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1a8299d1fe5..35a50db9b6c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -624,7 +624,7 @@ static struct ctl_table kern_table[] = { { .ctl_name = KERN_PRINTK_RATELIMIT, .procname = "printk_ratelimit", - .data = &printk_ratelimit_jiffies, + .data = &printk_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -633,7 +633,7 @@ static struct ctl_table kern_table[] = { { .ctl_name = KERN_PRINTK_RATELIMIT_BURST, .procname = "printk_ratelimit_burst", - .data = &printk_ratelimit_burst, + .data = &printk_ratelimit_state.burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 485e3040dcd..35136671b21 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -3,6 +3,9 @@ * * Isolated from kernel/printk.c by Dave Young * + * 2008-05-01 rewrite the function and use a ratelimit_state data struct as + * parameter. Now every user can use their own standalone ratelimit_state. + * * This file is released under the GPLv2. * */ @@ -11,41 +14,43 @@ #include #include +static DEFINE_SPINLOCK(ratelimit_lock); +static unsigned long flags; + /* * __ratelimit - rate limiting - * @ratelimit_jiffies: minimum time in jiffies between two callbacks - * @ratelimit_burst: number of callbacks we do before ratelimiting + * @rs: ratelimit_state data * - * This enforces a rate limit: not more than @ratelimit_burst callbacks - * in every ratelimit_jiffies + * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks + * in every @rs->ratelimit_jiffies */ -int __ratelimit(int ratelimit_jiffies, int ratelimit_burst) +int __ratelimit(struct ratelimit_state *rs) { - static DEFINE_SPINLOCK(ratelimit_lock); - static unsigned toks = 10 * 5 * HZ; - static unsigned long last_msg; - static int missed; - unsigned long flags; - unsigned long now = jiffies; + if (!rs->interval) + return 1; spin_lock_irqsave(&ratelimit_lock, flags); - toks += now - last_msg; - last_msg = now; - if (toks > (ratelimit_burst * ratelimit_jiffies)) - toks = ratelimit_burst * ratelimit_jiffies; - if (toks >= ratelimit_jiffies) { - int lost = missed; + if (!rs->begin) + rs->begin = jiffies; - missed = 0; - toks -= ratelimit_jiffies; - spin_unlock_irqrestore(&ratelimit_lock, flags); - if (lost) - printk(KERN_WARNING "%s: %d messages suppressed\n", - __func__, lost); - return 1; + if (time_is_before_jiffies(rs->begin + rs->interval)) { + if (rs->missed) + printk(KERN_WARNING "%s: %d callbacks suppressed\n", + __func__, rs->missed); + rs->begin = 0; + rs->printed = 0; + rs->missed = 0; } - missed++; + if (rs->burst && rs->burst > rs->printed) + goto print; + + rs->missed++; spin_unlock_irqrestore(&ratelimit_lock, flags); return 0; + +print: + rs->printed++; + spin_unlock_irqrestore(&ratelimit_lock, flags); + return 1; } EXPORT_SYMBOL(__ratelimit); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a570e2af22c..f686467ff12 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -67,7 +67,7 @@ static struct ctl_table net_core_table[] = { { .ctl_name = NET_CORE_MSG_COST, .procname = "message_cost", - .data = &net_msg_cost, + .data = &net_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -76,7 +76,7 @@ static struct ctl_table net_core_table[] = { { .ctl_name = NET_CORE_MSG_BURST, .procname = "message_burst", - .data = &net_msg_burst, + .data = &net_ratelimit_state.burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, diff --git a/net/core/utils.c b/net/core/utils.c index 8031eb59054..72e0ebe964a 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -31,17 +31,16 @@ #include #include -int net_msg_cost __read_mostly = 5*HZ; -int net_msg_burst __read_mostly = 10; int net_msg_warn __read_mostly = 1; EXPORT_SYMBOL(net_msg_warn); +DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10); /* * All net warning printk()s should be guarded by this function. */ int net_ratelimit(void) { - return __printk_ratelimit(net_msg_cost, net_msg_burst); + return __ratelimit(&net_ratelimit_state); } EXPORT_SYMBOL(net_ratelimit); -- cgit v1.2.3-70-g09d2 From 4ecb90090c84210a8bd2a9d7a5906e616735873c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 25 Jul 2008 01:48:32 -0700 Subject: sysctl: allow override of /proc/sys/net with CAP_NET_ADMIN Extend the permission check for networking sysctl's to allow modification when current process has CAP_NET_ADMIN capability and is not root. This version uses the until now unused permissions hook to override the mode value for /proc/sys/net if accessed by a user with capabilities. Found while working with Quagga. It is impossible to turn forwarding on/off through the command interface because Quagga uses secure coding practice of dropping privledges during initialization and only raising via capabilities when necessary. Since the dameon has reset real/effective uid after initialization, all attempts to access /proc/sys/net variables will fail. Signed-off-by: Stephen Hemminger Acked-by: "Eric W. Biederman" Cc: Chris Wright Cc: Alexey Dobriyan Cc: Andrew Morgan Cc: Pavel Emelyanov Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/sysctl_net.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 007c1a6708e..63ada437fc2 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -35,8 +35,22 @@ net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces) return &namespaces->net_ns->sysctl_table_headers; } +/* Return standard mode bits for table entry. */ +static int net_ctl_permissions(struct ctl_table_root *root, + struct nsproxy *nsproxy, + struct ctl_table *table) +{ + /* Allow network administrator to have same access as root. */ + if (capable(CAP_NET_ADMIN)) { + int mode = (table->mode >> 6) & 7; + return (mode << 6) | (mode << 3) | mode; + } + return table->mode; +} + static struct ctl_table_root net_sysctl_root = { .lookup = net_ctl_header_lookup, + .permissions = net_ctl_permissions, }; static LIST_HEAD(net_sysctl_ro_tables); -- cgit v1.2.3-70-g09d2 From 547b792cac0a038b9dbf958d3c120df3740b5572 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 25 Jul 2008 21:43:18 -0700 Subject: net: convert BUG_TRAP to generic WARN_ON MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes legacy reinvent-the-wheel type thing. The generic machinery integrates much better to automated debugging aids such as kerneloops.org (and others), and is unambiguous due to better naming. Non-intuively BUG_TRAP() is actually equal to WARN_ON() rather than BUG_ON() though some might actually be promoted to BUG_ON() but I left that to future. I could make at least one BUILD_BUG_ON conversion. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/request_sock.h | 5 +++-- net/appletalk/ddp.c | 4 ++-- net/core/datagram.c | 8 ++++---- net/core/dev.c | 10 +++++----- net/core/request_sock.c | 2 +- net/core/skbuff.c | 20 ++++++++++---------- net/core/stream.c | 6 +++--- net/core/user_dma.c | 5 ++--- net/dccp/dccp.h | 2 +- net/dccp/input.c | 2 +- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/dccp/proto.c | 4 ++-- net/dccp/timer.c | 2 +- net/ipv4/af_inet.c | 14 +++++++------- net/ipv4/devinet.c | 6 +++--- net/ipv4/inet_connection_sock.c | 18 +++++++++--------- net/ipv4/inet_fragment.c | 4 ++-- net/ipv4/inet_hashtables.c | 8 ++++---- net/ipv4/inet_timewait_sock.c | 2 +- net/ipv4/ip_fragment.c | 4 ++-- net/ipv4/ip_output.c | 2 +- net/ipv4/tcp.c | 12 ++++++------ net/ipv4/tcp_input.c | 20 ++++++++++---------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_timer.c | 2 +- net/ipv6/addrconf.c | 11 +++++++---- net/ipv6/af_inet6.c | 2 +- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/inet6_hashtables.c | 4 ++-- net/ipv6/ip6_fib.c | 31 ++++++++++++++++--------------- net/ipv6/ip6_output.c | 2 +- net/ipv6/mip6.c | 8 ++++---- net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++-- net/ipv6/reassembly.c | 4 ++-- net/ipv6/tcp_ipv6.c | 2 +- net/key/af_key.c | 4 ++-- net/netlink/af_netlink.c | 7 ++++--- net/packet/af_packet.c | 4 ++-- net/rxrpc/af_rxrpc.c | 6 +++--- net/sched/act_api.c | 2 +- net/sched/act_police.c | 2 +- net/sched/cls_u32.c | 10 +++++----- net/sched/sch_cbq.c | 4 ++-- net/sched/sch_generic.c | 2 +- net/sched/sch_htb.c | 16 ++++++++-------- net/sctp/associola.c | 2 +- net/unix/af_unix.c | 8 ++++---- net/xfrm/xfrm_algo.c | 4 ++-- net/xfrm/xfrm_ipcomp.c | 3 +-- net/xfrm/xfrm_state.c | 2 +- 51 files changed, 159 insertions(+), 155 deletions(-) (limited to 'net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 0c96e7bed5d..8d6e991ef4d 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -170,7 +171,7 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue { struct request_sock *req = queue->rskq_accept_head; - BUG_TRAP(req != NULL); + WARN_ON(req == NULL); queue->rskq_accept_head = req->dl_next; if (queue->rskq_accept_head == NULL) @@ -185,7 +186,7 @@ static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queu struct request_sock *req = reqsk_queue_remove(queue); struct sock *child = req->sk; - BUG_TRAP(child != NULL); + WARN_ON(child == NULL); sk_acceptq_removed(parent); __reqsk_free(req); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 07b5b82c5ea..0c850427a85 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -959,7 +959,7 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -986,7 +986,7 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { diff --git a/net/core/datagram.c b/net/core/datagram.c index 8a28fc93b72..dd61dcad601 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -285,7 +285,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -315,7 +315,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -366,7 +366,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -402,7 +402,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, for (; list; list=list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { diff --git a/net/core/dev.c b/net/core/dev.c index ccf97f9f37e..c6f9c83745e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1973,7 +1973,7 @@ static void net_tx_action(struct softirq_action *h) struct sk_buff *skb = clist; clist = clist->next; - BUG_TRAP(!atomic_read(&skb->users)); + WARN_ON(atomic_read(&skb->users)); __kfree_skb(skb); } } @@ -3847,7 +3847,7 @@ static void rollback_registered(struct net_device *dev) dev->uninit(dev); /* Notifier chain MUST detach us from master device. */ - BUG_TRAP(!dev->master); + WARN_ON(dev->master); /* Remove entries from kobject tree */ netdev_unregister_kobject(dev); @@ -4169,9 +4169,9 @@ void netdev_run_todo(void) /* paranoia */ BUG_ON(atomic_read(&dev->refcnt)); - BUG_TRAP(!dev->ip_ptr); - BUG_TRAP(!dev->ip6_ptr); - BUG_TRAP(!dev->dn_ptr); + WARN_ON(dev->ip_ptr); + WARN_ON(dev->ip6_ptr); + WARN_ON(dev->dn_ptr); if (dev->destructor) dev->destructor(dev); diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 2d3035d3abd..7552495aff7 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -123,7 +123,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) } } - BUG_TRAP(lopt->qlen == 0); + WARN_ON(lopt->qlen != 0); if (lopt_size > PAGE_SIZE) vfree(lopt); else diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e4115672b6c..4e0c9227418 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1200,7 +1200,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -1229,7 +1229,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -1475,7 +1475,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + frag->size; if ((copy = end - offset) > 0) { @@ -1503,7 +1503,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -1552,7 +1552,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -1581,7 +1581,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -1629,7 +1629,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -1662,7 +1662,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, __wsum csum2; int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -2373,7 +2373,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -2397,7 +2397,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { diff --git a/net/core/stream.c b/net/core/stream.c index 4a0ad152c9c..a6b3437ff08 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -192,13 +192,13 @@ void sk_stream_kill_queues(struct sock *sk) __skb_queue_purge(&sk->sk_error_queue); /* Next, the write queue. */ - BUG_TRAP(skb_queue_empty(&sk->sk_write_queue)); + WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); /* Account for returned memory. */ sk_mem_reclaim(sk); - BUG_TRAP(!sk->sk_wmem_queued); - BUG_TRAP(!sk->sk_forward_alloc); + WARN_ON(sk->sk_wmem_queued); + WARN_ON(sk->sk_forward_alloc); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket diff --git a/net/core/user_dma.c b/net/core/user_dma.c index c77aff9c6eb..53c6b67b287 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -27,7 +27,6 @@ #include #include -#include /* for BUG_TRAP */ #include #include @@ -71,7 +70,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; copy = end - offset; @@ -100,7 +99,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; copy = end - offset; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 32617e0576c..743d85fcd65 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -164,7 +164,7 @@ static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp) { s64 delta = dccp_delta_seqno(s1, s2); - BUG_TRAP(delta >= 0); + WARN_ON(delta < 0); return (u64)delta <= ndp + 1; } diff --git a/net/dccp/input.c b/net/dccp/input.c index 08392ed86c2..df2f110df94 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -413,7 +413,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, /* Stop the REQUEST timer */ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); - BUG_TRAP(sk->sk_send_head != NULL); + WARN_ON(sk->sk_send_head == NULL); __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2622ace17c4..a835b88237c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -283,7 +283,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) * ICMPs are not backlogged, hence we cannot get an established * socket here. */ - BUG_TRAP(!req->sk); + WARN_ON(req->sk); if (seq != dccp_rsk(req)->dreq_iss) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b74e8b2cbe5..da509127e00 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -186,7 +186,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, * ICMPs are not backlogged, hence we cannot get an established * socket here. */ - BUG_TRAP(req->sk == NULL); + WARN_ON(req->sk != NULL); if (seq != dccp_rsk(req)->dreq_iss) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a0b56009611..b622d974485 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -327,7 +327,7 @@ int dccp_disconnect(struct sock *sk, int flags) inet_csk_delack_init(sk); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || icsk->icsk_bind_hash); + WARN_ON(inet->num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; @@ -981,7 +981,7 @@ adjudge_to_death: */ local_bh_disable(); bh_lock_sock(sk); - BUG_TRAP(!sock_owned_by_user(sk)); + WARN_ON(sock_owned_by_user(sk)); /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 3608d5342ca..6a5b961b6f5 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -106,7 +106,7 @@ static void dccp_retransmit_timer(struct sock *sk) * -- Acks in client-PARTOPEN state (sec. 8.1.5) * -- CloseReq in server-CLOSEREQ state (sec. 8.3) * -- Close in node-CLOSING state (sec. 8.3) */ - BUG_TRAP(sk->sk_send_head != NULL); + WARN_ON(sk->sk_send_head == NULL); /* * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index dd919d84285..a107f49eea4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -148,10 +148,10 @@ void inet_sock_destruct(struct sock *sk) return; } - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(!sk->sk_wmem_queued); - BUG_TRAP(!sk->sk_forward_alloc); + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(sk->sk_wmem_queued); + WARN_ON(sk->sk_forward_alloc); kfree(inet->opt); dst_release(sk->sk_dst_cache); @@ -341,7 +341,7 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - BUG_TRAP(answer_prot->slab != NULL); + WARN_ON(answer_prot->slab == NULL); err = -ENOBUFS; sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); @@ -661,8 +661,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) lock_sock(sk2); - BUG_TRAP((1 << sk2->sk_state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)); + WARN_ON(!((1 << sk2->sk_state) & + (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); sock_graft(sk2, newsock); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2e667e2f90d..91d3d96805d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -138,8 +138,8 @@ void in_dev_finish_destroy(struct in_device *idev) { struct net_device *dev = idev->dev; - BUG_TRAP(!idev->ifa_list); - BUG_TRAP(!idev->mc_list); + WARN_ON(idev->ifa_list); + WARN_ON(idev->mc_list); #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", idev, dev ? dev->name : "NIL"); @@ -399,7 +399,7 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) } ipv4_devconf_setall(in_dev); if (ifa->ifa_dev != in_dev) { - BUG_TRAP(!ifa->ifa_dev); + WARN_ON(ifa->ifa_dev); in_dev_hold(in_dev); ifa->ifa_dev = in_dev; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index bb81c958b74..0c1ae68ee84 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -167,7 +167,7 @@ tb_not_found: success: if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); + WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); ret = 0; fail_unlock: @@ -260,7 +260,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) } newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); - BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); + WARN_ON(newsk->sk_state == TCP_SYN_RECV); out: release_sock(sk); return newsk; @@ -386,7 +386,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, ireq->rmt_addr == raddr && ireq->loc_addr == laddr && AF_INET_FAMILY(req->rsk_ops->family)) { - BUG_TRAP(!req->sk); + WARN_ON(req->sk); *prevp = prev; break; } @@ -539,14 +539,14 @@ EXPORT_SYMBOL_GPL(inet_csk_clone); */ void inet_csk_destroy_sock(struct sock *sk) { - BUG_TRAP(sk->sk_state == TCP_CLOSE); - BUG_TRAP(sock_flag(sk, SOCK_DEAD)); + WARN_ON(sk->sk_state != TCP_CLOSE); + WARN_ON(!sock_flag(sk, SOCK_DEAD)); /* It cannot be in hash table! */ - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); /* If it has not 0 inet_sk(sk)->num, it must be bound */ - BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); + WARN_ON(inet_sk(sk)->num && !inet_csk(sk)->icsk_bind_hash); sk->sk_prot->destroy(sk); @@ -629,7 +629,7 @@ void inet_csk_listen_stop(struct sock *sk) local_bh_disable(); bh_lock_sock(child); - BUG_TRAP(!sock_owned_by_user(child)); + WARN_ON(sock_owned_by_user(child)); sock_hold(child); sk->sk_prot->disconnect(child, O_NONBLOCK); @@ -647,7 +647,7 @@ void inet_csk_listen_stop(struct sock *sk) sk_acceptq_removed(sk); __reqsk_free(req); } - BUG_TRAP(!sk->sk_ack_backlog); + WARN_ON(sk->sk_ack_backlog); } EXPORT_SYMBOL_GPL(inet_csk_listen_stop); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 0546a0bc97e..6c52e08f786 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -134,8 +134,8 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, struct sk_buff *fp; struct netns_frags *nf; - BUG_TRAP(q->last_in & INET_FRAG_COMPLETE); - BUG_TRAP(del_timer(&q->timer) == 0); + WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); + WARN_ON(del_timer(&q->timer) != 0); /* Release all fragment data. */ fp = q->fragments; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 115f53722d2..44981906fb9 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -305,7 +305,7 @@ unique: inet->num = lport; inet->sport = htons(lport); sk->sk_hash = hash; - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); @@ -342,7 +342,7 @@ void __inet_hash_nolisten(struct sock *sk) rwlock_t *lock; struct inet_ehash_bucket *head; - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); sk->sk_hash = inet_sk_ehashfn(sk); head = inet_ehash_bucket(hashinfo, sk->sk_hash); @@ -367,7 +367,7 @@ static void __inet_hash(struct sock *sk) return; } - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; lock = &hashinfo->lhash_lock; @@ -450,7 +450,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, */ inet_bind_bucket_for_each(tb, node, &head->chain) { if (tb->ib_net == net && tb->port == port) { - BUG_TRAP(!hlist_empty(&tb->owners)); + WARN_ON(hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; if (!check_established(death_row, sk, diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 75c2def8f9a..d985bd613d2 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -86,7 +86,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; - BUG_TRAP(icsk->icsk_bind_hash); + WARN_ON(!icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 38d38f05801..2152d222b95 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -488,8 +488,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, qp->q.fragments = head; } - BUG_TRAP(head != NULL); - BUG_TRAP(FRAG_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(FRAG_CB(head)->offset != 0); /* Allocate a new buffer for the datagram. */ ihlen = ip_hdrlen(head); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 465544f6281..d533a89e08d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -118,7 +118,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) __skb_pull(newskb, skb_network_offset(newskb)); newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; - BUG_TRAP(newskb->dst); + WARN_ON(!newskb->dst); netif_rx(newskb); return 0; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0b491bf03db..1ab341e5d3e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1096,7 +1096,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) #if TCP_DEBUG struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); - BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); + WARN_ON(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); #endif if (inet_csk_ack_scheduled(sk)) { @@ -1358,7 +1358,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto found_ok_skb; if (tcp_hdr(skb)->fin) goto found_fin_ok; - BUG_TRAP(flags & MSG_PEEK); + WARN_ON(!(flags & MSG_PEEK)); skb = skb->next; } while (skb != (struct sk_buff *)&sk->sk_receive_queue); @@ -1421,8 +1421,8 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, tp->ucopy.len = len; - BUG_TRAP(tp->copied_seq == tp->rcv_nxt || - (flags & (MSG_PEEK | MSG_TRUNC))); + WARN_ON(tp->copied_seq != tp->rcv_nxt && + !(flags & (MSG_PEEK | MSG_TRUNC))); /* Ugly... If prequeue is not empty, we have to * process it before releasing socket, otherwise @@ -1844,7 +1844,7 @@ adjudge_to_death: */ local_bh_disable(); bh_lock_sock(sk); - BUG_TRAP(!sock_owned_by_user(sk)); + WARN_ON(sock_owned_by_user(sk)); /* Have we already been destroyed by a softirq or backlog? */ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) @@ -1973,7 +1973,7 @@ int tcp_disconnect(struct sock *sk, int flags) memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || icsk->icsk_bind_hash); + WARN_ON(inet->num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 75efd244f2a..67ccce2a96b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1629,10 +1629,10 @@ advance_sp: out: #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); - BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0); + WARN_ON((int)tp->sacked_out < 0); + WARN_ON((int)tp->lost_out < 0); + WARN_ON((int)tp->retrans_out < 0); + WARN_ON((int)tcp_packets_in_flight(tp) < 0); #endif return flag; } @@ -2181,7 +2181,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) int err; unsigned int mss; - BUG_TRAP(packets <= tp->packets_out); + WARN_ON(packets > tp->packets_out); if (tp->lost_skb_hint) { skb = tp->lost_skb_hint; cnt = tp->lost_cnt_hint; @@ -2610,7 +2610,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) /* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ if (icsk->icsk_ca_state == TCP_CA_Open) { - BUG_TRAP(tp->retrans_out == 0); + WARN_ON(tp->retrans_out != 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { switch (icsk->icsk_ca_state) { @@ -2972,9 +2972,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) } #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); + WARN_ON((int)tp->sacked_out < 0); + WARN_ON((int)tp->lost_out < 0); + WARN_ON((int)tp->retrans_out < 0); if (!tp->packets_out && tcp_is_sack(tp)) { icsk = inet_csk(sk); if (tp->lost_out) { @@ -3877,7 +3877,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) int i; /* RCV.NXT must cover all the block! */ - BUG_TRAP(!before(tp->rcv_nxt, sp->end_seq)); + WARN_ON(before(tp->rcv_nxt, sp->end_seq)); /* Zap this SACK, by moving forward any other SACKS. */ for (i=this_sack+1; i < num_sacks; i++) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a82df630756..a2b06d0cc26 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -418,7 +418,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) /* ICMPs are not backlogged, hence we cannot get an established socket here. */ - BUG_TRAP(!req->sk); + WARN_ON(req->sk); if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 328e0cf42b3..5ab6ba19c3c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -287,7 +287,7 @@ static void tcp_retransmit_timer(struct sock *sk) if (!tp->packets_out) goto out; - BUG_TRAP(!tcp_write_queue_empty(sk)); + WARN_ON(tcp_write_queue_empty(sk)); if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 74d543d504a..a7842c54f58 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -313,8 +313,10 @@ static void in6_dev_finish_destroy_rcu(struct rcu_head *head) void in6_dev_finish_destroy(struct inet6_dev *idev) { struct net_device *dev = idev->dev; - BUG_TRAP(idev->addr_list==NULL); - BUG_TRAP(idev->mc_list==NULL); + + WARN_ON(idev->addr_list != NULL); + WARN_ON(idev->mc_list != NULL); + #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL"); #endif @@ -517,8 +519,9 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) { - BUG_TRAP(ifp->if_next==NULL); - BUG_TRAP(ifp->lst_next==NULL); + WARN_ON(ifp->if_next != NULL); + WARN_ON(ifp->lst_next != NULL); + #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); #endif diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3d828bc4b1c..0843c4d6218 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -153,7 +153,7 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - BUG_TRAP(answer_prot->slab != NULL); + WARN_ON(answer_prot->slab == NULL); err = -ENOBUFS; sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 87801cc1b2f..16d43f20b32 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -98,7 +98,7 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, ipv6_addr_equal(&treq->rmt_addr, raddr) && ipv6_addr_equal(&treq->loc_addr, laddr) && (!treq->iif || treq->iif == iif)) { - BUG_TRAP(req->sk == NULL); + WARN_ON(req->sk != NULL); *prevp = prev; return req; } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 00a8a5f9380..1646a565825 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -28,7 +28,7 @@ void __inet6_hash(struct sock *sk) struct hlist_head *list; rwlock_t *lock; - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; @@ -202,7 +202,7 @@ unique: * in hash table socket with a funny identity. */ inet->num = lport; inet->sport = htons(lport); - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sk->sk_hash = hash; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 08ea2de28d6..52dddc25d3e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -287,7 +287,7 @@ static int fib6_dump_node(struct fib6_walker_t *w) w->leaf = rt; return 1; } - BUG_TRAP(res!=0); + WARN_ON(res == 0); } w->leaf = NULL; return 0; @@ -778,7 +778,7 @@ out: pn->leaf = fib6_find_prefix(info->nl_net, pn); #if RT6_DEBUG >= 2 if (!pn->leaf) { - BUG_TRAP(pn->leaf != NULL); + WARN_ON(pn->leaf == NULL); pn->leaf = info->nl_net->ipv6.ip6_null_entry; } #endif @@ -942,7 +942,7 @@ struct fib6_node * fib6_locate(struct fib6_node *root, #ifdef CONFIG_IPV6_SUBTREES if (src_len) { - BUG_TRAP(saddr!=NULL); + WARN_ON(saddr == NULL); if (fn && fn->subtree) fn = fib6_locate_1(fn->subtree, saddr, src_len, offsetof(struct rt6_info, rt6i_src)); @@ -996,9 +996,9 @@ static struct fib6_node *fib6_repair_tree(struct net *net, RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); iter++; - BUG_TRAP(!(fn->fn_flags&RTN_RTINFO)); - BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT)); - BUG_TRAP(fn->leaf==NULL); + WARN_ON(fn->fn_flags & RTN_RTINFO); + WARN_ON(fn->fn_flags & RTN_TL_ROOT); + WARN_ON(fn->leaf != NULL); children = 0; child = NULL; @@ -1014,7 +1014,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, fn->leaf = fib6_find_prefix(net, fn); #if RT6_DEBUG >= 2 if (fn->leaf==NULL) { - BUG_TRAP(fn->leaf); + WARN_ON(!fn->leaf); fn->leaf = net->ipv6.ip6_null_entry; } #endif @@ -1025,16 +1025,17 @@ static struct fib6_node *fib6_repair_tree(struct net *net, pn = fn->parent; #ifdef CONFIG_IPV6_SUBTREES if (FIB6_SUBTREE(pn) == fn) { - BUG_TRAP(fn->fn_flags&RTN_ROOT); + WARN_ON(!(fn->fn_flags & RTN_ROOT)); FIB6_SUBTREE(pn) = NULL; nstate = FWS_L; } else { - BUG_TRAP(!(fn->fn_flags&RTN_ROOT)); + WARN_ON(fn->fn_flags & RTN_ROOT); #endif if (pn->right == fn) pn->right = child; else if (pn->left == fn) pn->left = child; #if RT6_DEBUG >= 2 - else BUG_TRAP(0); + else + WARN_ON(1); #endif if (child) child->parent = pn; @@ -1154,14 +1155,14 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) #if RT6_DEBUG >= 2 if (rt->u.dst.obsolete>0) { - BUG_TRAP(fn==NULL); + WARN_ON(fn != NULL); return -ENOENT; } #endif if (fn == NULL || rt == net->ipv6.ip6_null_entry) return -ENOENT; - BUG_TRAP(fn->fn_flags&RTN_RTINFO); + WARN_ON(!(fn->fn_flags & RTN_RTINFO)); if (!(rt->rt6i_flags&RTF_CACHE)) { struct fib6_node *pn = fn; @@ -1266,7 +1267,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) w->node = pn; #ifdef CONFIG_IPV6_SUBTREES if (FIB6_SUBTREE(pn) == fn) { - BUG_TRAP(fn->fn_flags&RTN_ROOT); + WARN_ON(!(fn->fn_flags & RTN_ROOT)); w->state = FWS_L; continue; } @@ -1281,7 +1282,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) continue; } #if RT6_DEBUG >= 2 - BUG_TRAP(0); + WARN_ON(1); #endif } } @@ -1323,7 +1324,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) } return 0; } - BUG_TRAP(res==0); + WARN_ON(res != 0); } w->leaf = rt; return 0; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6407c64ea4a..6811901e6b1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -116,7 +116,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) __skb_pull(newskb, skb_network_offset(newskb)); newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; - BUG_TRAP(newskb->dst); + WARN_ON(!newskb->dst); netif_rx(newskb); return 0; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index ad1cc5bbf97..31295c8f619 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -164,8 +164,8 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) calc_padlen(sizeof(*dstopt), 6)); hao->type = IPV6_TLV_HAO; + BUILD_BUG_ON(sizeof(*hao) != 18); hao->length = sizeof(*hao) - 2; - BUG_TRAP(hao->length == 16); len = ((char *)hao - (char *)dstopt) + sizeof(*hao); @@ -174,7 +174,7 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr)); spin_unlock_bh(&x->lock); - BUG_TRAP(len == x->props.header_len); + WARN_ON(len != x->props.header_len); dstopt->hdrlen = (x->props.header_len >> 3) - 1; return 0; @@ -317,7 +317,7 @@ static int mip6_destopt_init_state(struct xfrm_state *x) x->props.header_len = sizeof(struct ipv6_destopt_hdr) + calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) + sizeof(struct ipv6_destopt_hao); - BUG_TRAP(x->props.header_len == 24); + WARN_ON(x->props.header_len != 24); return 0; } @@ -380,7 +380,7 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) rt2->rt_hdr.segments_left = 1; memset(&rt2->reserved, 0, sizeof(rt2->reserved)); - BUG_TRAP(rt2->rt_hdr.hdrlen == 2); + WARN_ON(rt2->rt_hdr.hdrlen != 2); memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr)); spin_lock_bh(&x->lock); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index cf20bc4fd60..52d06dd4b81 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -416,8 +416,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) fq_kill(fq); - BUG_TRAP(head != NULL); - BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 6ab957ec2dd..89184b576e2 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -473,8 +473,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, fq->q.fragments = head; } - BUG_TRAP(head != NULL); - BUG_TRAP(FRAG6_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(FRAG6_CB(head)->offset != 0); /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ae45f983501..cff778b23a7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -421,7 +421,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* ICMPs are not backlogged, hence we cannot get * an established socket here. */ - BUG_TRAP(req->sk == NULL); + WARN_ON(req->sk != NULL); if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); diff --git a/net/key/af_key.c b/net/key/af_key.c index f0fc46c8038..d628df97e02 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -96,8 +96,8 @@ static void pfkey_sock_destruct(struct sock *sk) return; } - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); atomic_dec(&pfkey_socks_nr); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 98bfe277eab..b0eacc0007c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -158,9 +158,10 @@ static void netlink_sock_destruct(struct sock *sk) printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); return; } - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(!nlk_sk(sk)->groups); + + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(nlk_sk(sk)->groups); } /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d56cae112dc..c718e7e3f7d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -260,8 +260,8 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) static void packet_sock_destruct(struct sock *sk) { - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive packet socket: %p\n", sk); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 4b2682feeed..32e489118be 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -660,9 +660,9 @@ static void rxrpc_sock_destructor(struct sock *sk) rxrpc_purge_queue(&sk->sk_receive_queue); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(sk_unhashed(sk)); - BUG_TRAP(!sk->sk_socket); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(!sk_unhashed(sk)); + WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive rxrpc socket: %p\n", sk); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 74e662cbb2c..d308c19aa3f 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -41,7 +41,7 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) return; } } - BUG_TRAP(0); + WARN_ON(1); } EXPORT_SYMBOL(tcf_hash_destroy); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 32c3f9d9fb7..38015b49394 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -116,7 +116,7 @@ static void tcf_police_destroy(struct tcf_police *p) return; } } - BUG_TRAP(0); + WARN_ON(1); } static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 527db2559dd..246f9065ce3 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -345,7 +345,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key) } } } - BUG_TRAP(0); + WARN_ON(1); return 0; } @@ -368,7 +368,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) struct tc_u_common *tp_c = tp->data; struct tc_u_hnode **hn; - BUG_TRAP(!ht->refcnt); + WARN_ON(ht->refcnt); u32_clear_hnode(tp, ht); @@ -380,7 +380,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) } } - BUG_TRAP(0); + WARN_ON(1); return -ENOENT; } @@ -389,7 +389,7 @@ static void u32_destroy(struct tcf_proto *tp) struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = xchg(&tp->root, NULL); - BUG_TRAP(root_ht != NULL); + WARN_ON(root_ht == NULL); if (root_ht && --root_ht->refcnt == 0) u32_destroy_hnode(tp, root_ht); @@ -407,7 +407,7 @@ static void u32_destroy(struct tcf_proto *tp) while ((ht = tp_c->hlist) != NULL) { tp_c->hlist = ht->next; - BUG_TRAP(ht->refcnt == 0); + WARN_ON(ht->refcnt != 0); kfree(ht); } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index f1d2f8ec8b4..14954bf4a68 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1175,7 +1175,7 @@ static void cbq_unlink_class(struct cbq_class *this) this->tparent->children = NULL; } } else { - BUG_TRAP(this->sibling == this); + WARN_ON(this->sibling != this); } } @@ -1699,7 +1699,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) { struct cbq_sched_data *q = qdisc_priv(sch); - BUG_TRAP(!cl->filters); + WARN_ON(cl->filters); tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->q); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 43abd4d27ea..fd2a6cadb11 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -746,5 +746,5 @@ void dev_shutdown(struct net_device *dev) { netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); - BUG_TRAP(!timer_pending(&dev->watchdog_timer)); + WARN_ON(timer_pending(&dev->watchdog_timer)); } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 30c999c61b0..75a40951c4f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -524,7 +524,7 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) */ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) { - BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen); + WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen); if (!cl->prio_activity) { cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); @@ -542,7 +542,7 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) */ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { - BUG_TRAP(cl->prio_activity); + WARN_ON(!cl->prio_activity); htb_deactivate_prios(q, cl); cl->prio_activity = 0; @@ -757,7 +757,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, u32 *pid; } stk[TC_HTB_MAXDEPTH], *sp = stk; - BUG_TRAP(tree->rb_node); + WARN_ON(!tree->rb_node); sp->root = tree->rb_node; sp->pptr = pptr; sp->pid = pid; @@ -777,7 +777,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, *sp->pptr = (*sp->pptr)->rb_left; if (sp > stk) { sp--; - BUG_TRAP(*sp->pptr); + WARN_ON(!*sp->pptr); if (!*sp->pptr) return NULL; htb_next_rb_node(sp->pptr); @@ -792,7 +792,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, sp->pid = cl->un.inner.last_ptr_id + prio; } } - BUG_TRAP(0); + WARN_ON(1); return NULL; } @@ -810,7 +810,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, do { next: - BUG_TRAP(cl); + WARN_ON(!cl); if (!cl) return NULL; @@ -1185,7 +1185,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, { struct htb_class *parent = cl->parent; - BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity); + WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity); if (parent->cmode != HTB_CAN_SEND) htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level); @@ -1205,7 +1205,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { if (!cl->level) { - BUG_TRAP(cl->un.leaf.q); + WARN_ON(!cl->un.leaf.q); qdisc_destroy(cl->un.leaf.q); } gen_kill_estimator(&cl->bstats, &cl->rate_est); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index ec2a0a33fd7..8472b8b349c 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -464,7 +464,7 @@ static void sctp_association_destroy(struct sctp_association *asoc) spin_unlock_bh(&sctp_assocs_id_lock); } - BUG_TRAP(!atomic_read(&asoc->rmem_alloc)); + WARN_ON(atomic_read(&asoc->rmem_alloc)); if (asoc->base.malloced) { kfree(asoc); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 70ceb1604ad..24eb214581d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -227,7 +227,7 @@ static void __unix_remove_socket(struct sock *sk) static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) { - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); sk_add_node(sk, list); } @@ -350,9 +350,9 @@ static void unix_sock_destructor(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(sk_unhashed(sk)); - BUG_TRAP(!sk->sk_socket); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(!sk_unhashed(sk)); + WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive unix socket: %p\n", sk); return; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 23a2cc04b8c..96036cf2216 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -718,7 +718,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -748,7 +748,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 800f669083f..c609a4b98e1 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -251,7 +250,7 @@ static void ipcomp_free_tfms(struct crypto_comp **tfms) break; } - BUG_TRAP(pos); + WARN_ON(!pos); if (--pos->users) return; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 72fddafd891..4c6914ef7d9 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -538,7 +538,7 @@ EXPORT_SYMBOL(xfrm_state_alloc); void __xfrm_state_destroy(struct xfrm_state *x) { - BUG_TRAP(x->km.state == XFRM_STATE_DEAD); + WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_lock); list_del(&x->all); -- cgit v1.2.3-70-g09d2 From 16df845f4566bc252f3e09db12f5c2f22cb44226 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 26 Jul 2008 02:21:54 -0700 Subject: syncookies: Make sure ECN is disabled ecn_ok is not initialized when a connection is established by cookies. The cookie syn-ack never sets ECN, so ecn_ok must be set to 0. Spotted using ns-3/network simulation cradle simulator and valgrind. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 1 + net/ipv6/syncookies.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 51bc24d3b8a..9d38005abba 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -299,6 +299,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; + ireq->ecn_ok = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 6a68eeb7bbf..a46badd1082 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -223,6 +223,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->retrans = 0; + ireq->ecn_ok = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; -- cgit v1.2.3-70-g09d2 From cdec7e50a4896c5197d5575d9ca635eea6825149 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Jul 2008 02:28:09 -0700 Subject: Revert "pkt_sched: sch_sfq: dump a real number of flows" This reverts commit f867e6af94239a04ec23aeec2fcda5aa58e41db7. Based upon discussions between Jarek and Patrick McHardy this is field being set is more a config parameter than a statistic. And we should add a true statistic to provide this information if we really want it. Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 73f53844ce9..8589da66656 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -536,14 +536,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) opt.limit = q->limit; opt.divisor = SFQ_HASH_DIVISOR; - opt.flows = 0; - if (q->tail != SFQ_DEPTH) { - unsigned int i; - - for (i = 0; i < SFQ_HASH_DIVISOR; i++) - if (q->ht[i] != SFQ_DEPTH) - opt.flows++; - } + opt.flows = q->limit; NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); -- cgit v1.2.3-70-g09d2 From 59435444a13ed52d3444c5df26b73d3086bcd57b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 26 Jul 2008 11:59:09 +0100 Subject: dccp: Allow to distinguish original and retransmitted packets This patch allows the sender to distinguish original and retransmitted packets, which is in particular needed for the retransmission of DCCP-Requests: * the first Request uses ISS (generated in net/dccp/ip*.c), and sets GSS = ISS; * all retransmitted Requests use GSS' = GSS + 1, so that the n-th retransmitted Request has sequence number ISS + n (mod 48). To add generic support, the patch reorganises existing code so that: * icsk_retransmits == 0 for the original packet and * icsk_retransmits = n > 0 for the n-th retransmitted packet at the time dccp_transmit_skb() is called, via dccp_retransmit_skb(). Thanks to Wei Yongjun for pointing this problem out. Further changes: ---------------- * removed the `skb' argument from dccp_retransmit_skb(), since sk_send_head is used for all retransmissions (the exception is client-Acks in PARTOPEN state, but these do not use sk_send_head); * since sk_send_head always contains the original skb (via dccp_entail()), skb_cloned() never evaluated to true and thus pskb_copy() was never used. Signed-off-by: Gerrit Renker --- net/dccp/dccp.h | 2 +- net/dccp/output.c | 20 ++++++++++++++++---- net/dccp/timer.c | 20 ++++---------------- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 743d85fcd65..1c2e3ec2eb5 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -226,7 +226,7 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb) extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); -extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); +extern int dccp_retransmit_skb(struct sock *sk); extern void dccp_send_ack(struct sock *sk); extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk); diff --git a/net/dccp/output.c b/net/dccp/output.c index fe20068c5d8..d19d4819501 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -284,14 +284,26 @@ void dccp_write_xmit(struct sock *sk, int block) } } -int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +/** + * dccp_retransmit_skb - Retransmit Request, Close, or CloseReq packets + * There are only four retransmittable packet types in DCCP: + * - Request in client-REQUEST state (sec. 8.1.1), + * - CloseReq in server-CLOSEREQ state (sec. 8.3), + * - Close in node-CLOSING state (sec. 8.3), + * - Acks in client-PARTOPEN state (sec. 8.1.5, handled by dccp_delack_timer()). + * This function expects sk->sk_send_head to contain the original skb. + */ +int dccp_retransmit_skb(struct sock *sk) { + WARN_ON(sk->sk_send_head == NULL); + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0) return -EHOSTUNREACH; /* Routing failure or similar. */ - return dccp_transmit_skb(sk, (skb_cloned(skb) ? - pskb_copy(skb, GFP_ATOMIC): - skb_clone(skb, GFP_ATOMIC))); + /* this count is used to distinguish original and retransmitted skb */ + inet_csk(sk)->icsk_retransmits++; + + return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC)); } struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 6a5b961b6f5..54b3c7e9e01 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -98,22 +98,12 @@ static void dccp_retransmit_timer(struct sock *sk) goto backoff; } - /* - * sk->sk_send_head has to have one skb with - * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP - * packet types. The only packets eligible for retransmission are: - * -- Requests in client-REQUEST state (sec. 8.1.1) - * -- Acks in client-PARTOPEN state (sec. 8.1.5) - * -- CloseReq in server-CLOSEREQ state (sec. 8.3) - * -- Close in node-CLOSING state (sec. 8.3) */ - WARN_ON(sk->sk_send_head == NULL); - /* * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was * sent, no need to retransmit, this sock is dead. */ if (dccp_write_timeout(sk)) - goto out; + return; /* * We want to know the number of packets retransmitted, not the @@ -122,30 +112,28 @@ static void dccp_retransmit_timer(struct sock *sk) if (icsk->icsk_retransmits == 0) DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); - if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) { + if (dccp_retransmit_skb(sk) != 0) { /* * Retransmission failed because of local congestion, * do not backoff. */ - if (icsk->icsk_retransmits == 0) + if (--icsk->icsk_retransmits == 0) icsk->icsk_retransmits = 1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), DCCP_RTO_MAX); - goto out; + return; } backoff: icsk->icsk_backoff++; - icsk->icsk_retransmits++; icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, DCCP_RTO_MAX); if (icsk->icsk_retransmits > sysctl_dccp_retries1) __sk_dst_reset(sk); -out:; } static void dccp_write_timer(unsigned long data) -- cgit v1.2.3-70-g09d2 From 73f18fdbca3f92b90aeaee16f5175fe30496e218 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 26 Jul 2008 11:59:10 +0100 Subject: dccp: Bug-Fix - AWL was never updated The AWL lower Ack validity window advances in proportion to GSS, the greatest sequence number sent. Updating AWL other than at connection setup (in the DCCP-Request sent by dccp_v{4,6}_connect()) was missing in the DCCP code. This bug lead to syslog messages such as "kernel: dccp_check_seqno: DCCP: Step 6 failed for DATAACK packet, [...] P.ackno exists or LAWL(82947089) <= P.ackno(82948208) <= S.AWH(82948728), sending SYNC..." The difference between AWL/AWH here is 1639 packets, while the expected value (the Sequence Window) would have been 100 (the default). A closer look showed that LAWL = AWL = 82947089 equalled the ISS on the Response. The patch now updates AWL with each increase of GSS. Further changes: ---------------- The patch also enforces more stringent checks on the ISS sequence number: * AWL is initialised to ISS at connection setup and remains at this value; * AWH is then always set to GSS (via dccp_update_gss()); * so on the first Request: AWL = AWH = ISS, and on the n-th Request: AWL = ISS, AWH = ISS + n. As a consequence, only Response packets that refer to Requests sent by this host will pass, all others are discarded. This is the intention and in effect implements the initial adjustments for AWL as specified in RFC 4340, 7.5.1. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- net/dccp/output.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/dccp/output.c b/net/dccp/output.c index d19d4819501..d06945c7d3d 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -53,8 +53,11 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) dccp_packet_hdr_len(dcb->dccpd_type); int err, set_ack = 1; u64 ackno = dp->dccps_gsr; - - dccp_inc_seqno(&dp->dccps_gss); + /* + * Increment GSS here already in case the option code needs it. + * Update GSS for real only if option processing below succeeds. + */ + dcb->dccpd_seq = ADD48(dp->dccps_gss, 1); switch (dcb->dccpd_type) { case DCCP_PKT_DATA: @@ -66,6 +69,9 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_REQUEST: set_ack = 0; + /* Use ISS on the first (non-retransmitted) Request. */ + if (icsk->icsk_retransmits == 0) + dcb->dccpd_seq = dp->dccps_iss; /* fall through */ case DCCP_PKT_SYNC: @@ -84,8 +90,6 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) break; } - dcb->dccpd_seq = dp->dccps_gss; - if (dccp_insert_options(sk, skb)) { kfree_skb(skb); return -EPROTO; @@ -103,7 +107,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) /* XXX For now we're using only 48 bits sequence numbers */ dh->dccph_x = 1; - dp->dccps_awh = dp->dccps_gss; + dccp_update_gss(sk, dcb->dccpd_seq); dccp_hdr_set_seq(dh, dp->dccps_gss); if (set_ack) dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); @@ -112,6 +116,11 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_REQUEST: dccp_hdr_request(skb)->dccph_req_service = dp->dccps_service; + /* + * Limit Ack window to ISS <= P.ackno <= GSS, so that + * only Responses to Requests we sent are considered. + */ + dp->dccps_awl = dp->dccps_iss; break; case DCCP_PKT_RESET: dccp_hdr_reset(skb)->dccph_reset_code = @@ -449,19 +458,7 @@ static inline void dccp_connect_init(struct sock *sk) dccp_sync_mss(sk, dst_mtu(dst)); - /* - * SWL and AWL are initially adjusted so that they are not less than - * the initial Sequence Numbers received and sent, respectively: - * SWL := max(GSR + 1 - floor(W/4), ISR), - * AWL := max(GSS - W' + 1, ISS). - * These adjustments MUST be applied only at the beginning of the - * connection. - */ - dccp_update_gss(sk, dp->dccps_iss); - dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); - - /* S.GAR - greatest valid acknowledgement number received on a non-Sync; - * initialized to S.ISS (sec. 8.5) */ + /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ dp->dccps_gar = dp->dccps_iss; icsk->icsk_retransmits = 0; -- cgit v1.2.3-70-g09d2 From d68f0866f76e2bc4ddc07e88e2cb1bc8959a6d7e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 26 Jul 2008 11:59:10 +0100 Subject: dccp: Fix sequence number check for ICMPv4 packets The payload of ICMP message is a part of the packet sent by ourself, so the sequence number check must use AWL and AWH, not SWL and SWH. For example: Endpoint A Endpoint B DATA-ACK --------> (SEQ=X) <-------- ICMP (Fragmentation Needed) (SEQ=X) Signed-off-by: Wei Yongjun Acked-by: Gerrit Renker --- net/dccp/ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a835b88237c..6a2f1879e18 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -238,7 +238,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) dp = dccp_sk(sk); seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && - !between48(seq, dp->dccps_swl, dp->dccps_swh)) { + !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } -- cgit v1.2.3-70-g09d2 From e0bcfb0c6a6ed9ebd68746b306298dc5797fd426 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 26 Jul 2008 11:59:10 +0100 Subject: dccp: Add check for sequence number in ICMPv6 message This adds a sequence number check for ICMPv6 DCCP error packets, in the same manner as it has been done for ICMPv4 in the previous patch. Signed-off-by: Wei Yongjun Acked-by: Gerrit Renker --- net/dccp/ipv6.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index da509127e00..25826b1bf68 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -89,6 +89,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; int err; @@ -116,6 +117,14 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == DCCP_CLOSED) goto out; + dp = dccp_sk(sk); + seq = dccp_hdr_seq(dh); + if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && + !between48(seq, dp->dccps_awl, dp->dccps_awh)) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + goto out; + } + np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { @@ -168,7 +177,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, icmpv6_err_convert(type, code, &err); - seq = dccp_hdr_seq(dh); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; -- cgit v1.2.3-70-g09d2 From 18e1d836002ad970f42736bad09b7be9cfe99545 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 26 Jul 2008 11:59:10 +0100 Subject: dccp: Fix incorrect length check for ICMPv4 packets Unlike TCP, which only needs 8 octets of original packet data, DCCP requires minimally 12 or 16 bytes for ICMP-payload sequence number checks. This patch replaces the insufficient length constant of 8 with a two-stage test, making sure that 12 bytes are available, before computing the basic header length required for sequence number checks. Signed-off-by: Wei Yongjun Signed-off-by: Gerrit Renker --- net/dccp/ipv4.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6a2f1879e18..882c5c4de69 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -196,8 +196,8 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + - (iph->ihl << 2)); + const u8 offset = iph->ihl << 2; + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; @@ -207,7 +207,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - if (skb->len < (iph->ihl << 2) + 8) { + if (skb->len < offset + sizeof(*dh) || + skb->len < offset + __dccp_basic_hdr_len(dh)) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } -- cgit v1.2.3-70-g09d2 From 860239c56bbc7c830bdbcec93b140f22a5a5219b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 26 Jul 2008 11:59:11 +0100 Subject: dccp: Add check for truncated ICMPv6 DCCP error packets This patch adds a minimum-length check for ICMPv6 packets, as per the previous patch for ICMPv4 payloads. Signed-off-by: Wei Yongjun Signed-off-by: Gerrit Renker --- net/dccp/ipv6.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 25826b1bf68..5e1ee0da2c4 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -96,6 +96,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); + if (skb->len < offset + sizeof(*dh) || + skb->len < offset + __dccp_basic_hdr_len(dh)) { + ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + return; + } + sk = inet6_lookup(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); -- cgit v1.2.3-70-g09d2 From 0bc3cc03fa6e1c20aecb5a33356bcaae410640b9 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 24 Jul 2008 18:21:31 -0700 Subject: cpumask: change cpumask_of_cpu_ptr to use new cpumask_of_cpu * Replace previous instances of the cpumask_of_cpu_ptr* macros with a the new (lvalue capable) generic cpumask_of_cpu(). Signed-off-by: Mike Travis Cc: Andrew Morton Cc: Jack Steiner Cc: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/cstate.c | 3 +-- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 10 +++------- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 15 +++++---------- arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 12 ++++-------- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 3 +-- arch/x86/kernel/cpu/intel_cacheinfo.c | 3 +-- arch/x86/kernel/ldt.c | 6 ++---- arch/x86/kernel/microcode.c | 17 +++++------------ arch/x86/kernel/reboot.c | 11 +++-------- drivers/acpi/processor_throttling.c | 11 +++-------- drivers/firmware/dcdbas.c | 3 +-- drivers/misc/sgi-xp/xpc_main.c | 3 +-- kernel/stop_machine.c | 3 +-- kernel/time/tick-common.c | 8 +++----- kernel/trace/trace_sysprof.c | 4 +--- lib/smp_processor_id.c | 5 +---- net/sunrpc/svc.c | 3 +-- 17 files changed, 37 insertions(+), 83 deletions(-) (limited to 'net') diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 9220cf46aa1..c2502eb9aa8 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -73,7 +73,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct cpuinfo_x86 *c = &cpu_data(cpu); cpumask_t saved_mask; - cpumask_of_cpu_ptr(new_mask, cpu); int retval; unsigned int eax, ebx, ecx, edx; unsigned int edx_part; @@ -92,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ saved_mask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, new_mask); + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (retval) return -1; diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ff2fff56f0a..dd097b83583 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -200,12 +200,10 @@ static void drv_read(struct drv_cmd *cmd) static void drv_write(struct drv_cmd *cmd) { cpumask_t saved_mask = current->cpus_allowed; - cpumask_of_cpu_ptr_declare(cpu_mask); unsigned int i; for_each_cpu_mask_nr(i, cmd->mask) { - cpumask_of_cpu_ptr_next(cpu_mask, i); - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); do_drv_write(cmd); } @@ -269,12 +267,11 @@ static unsigned int get_measured_perf(unsigned int cpu) } aperf_cur, mperf_cur; cpumask_t saved_mask; - cpumask_of_cpu_ptr(cpu_mask, cpu); unsigned int perf_percent; unsigned int retval; saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (get_cpu() != cpu) { /* We were not able to run on requested processor */ put_cpu(); @@ -340,7 +337,6 @@ static unsigned int get_measured_perf(unsigned int cpu) static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { - cpumask_of_cpu_ptr(cpu_mask, cpu); struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); unsigned int freq; unsigned int cached_freq; @@ -353,7 +349,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) } cached_freq = data->freq_table[data->acpi_data->state].frequency; - freq = extract_freq(get_cur_val(cpu_mask), data); + freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); if (freq != cached_freq) { /* * The dreaded BIOS frequency change behind our back. diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 53c7b693697..c45ca6d4dce 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -479,12 +479,11 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi static int check_supported_cpu(unsigned int cpu) { cpumask_t oldmask; - cpumask_of_cpu_ptr(cpu_mask, cpu); u32 eax, ebx, ecx, edx; unsigned int rc = 0; oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); @@ -1017,7 +1016,6 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { cpumask_t oldmask; - cpumask_of_cpu_ptr(cpu_mask, pol->cpu); struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1032,7 +1030,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1107,7 +1105,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask; - cpumask_of_cpu_ptr_declare(newmask); int rc; if (!cpu_online(pol->cpu)) @@ -1159,8 +1156,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - cpumask_of_cpu_ptr_next(newmask, pol->cpu); - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1182,7 +1178,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) set_cpus_allowed_ptr(current, &oldmask); if (cpu_family == CPU_HW_PSTATE) - pol->cpus = *newmask; + pol->cpus = cpumask_of_cpu(pol->cpu); else pol->cpus = per_cpu(cpu_core_map, pol->cpu); data->available_cores = &(pol->cpus); @@ -1248,7 +1244,6 @@ static unsigned int powernowk8_get (unsigned int cpu) { struct powernow_k8_data *data; cpumask_t oldmask = current->cpus_allowed; - cpumask_of_cpu_ptr(newmask, cpu); unsigned int khz = 0; unsigned int first; @@ -1258,7 +1253,7 @@ static unsigned int powernowk8_get (unsigned int cpu) if (!data) return -EINVAL; - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index ca2ac13b7af..15e13c01cc3 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -324,10 +324,9 @@ static unsigned int get_cur_freq(unsigned int cpu) unsigned l, h; unsigned clock_freq; cpumask_t saved_mask; - cpumask_of_cpu_ptr(new_mask, cpu); saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) return 0; @@ -585,15 +584,12 @@ static int centrino_target (struct cpufreq_policy *policy, * Best effort undo.. */ - if (!cpus_empty(*covered_cpus)) { - cpumask_of_cpu_ptr_declare(new_mask); - + if (!cpus_empty(*covered_cpus)) for_each_cpu_mask_nr(j, *covered_cpus) { - cpumask_of_cpu_ptr_next(new_mask, j); - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, + &cpumask_of_cpu(j)); wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); } - } tmp = freqs.new; freqs.new = freqs.old; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 2f3728dc24f..191f7263c61 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -244,8 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus) static unsigned int speedstep_get(unsigned int cpu) { - cpumask_of_cpu_ptr(newmask, cpu); - return _speedstep_get(newmask); + return _speedstep_get(&cpumask_of_cpu(cpu)); } /** diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 650d40f7912..6b0a10b002f 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -516,7 +516,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) unsigned long j; int retval; cpumask_t oldmask; - cpumask_of_cpu_ptr(newmask, cpu); if (num_cache_leaves == 0) return -ENOENT; @@ -527,7 +526,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) return -ENOMEM; oldmask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, newmask); + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (retval) goto out; diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 3fee2aa50f3..b68e21f06f4 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -62,12 +62,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) if (reload) { #ifdef CONFIG_SMP - cpumask_of_cpu_ptr_declare(mask); - preempt_disable(); load_LDT(pc); - cpumask_of_cpu_ptr_next(mask, smp_processor_id()); - if (!cpus_equal(current->mm->cpu_vm_mask, *mask)) + if (!cpus_equal(current->mm->cpu_vm_mask, + cpumask_of_cpu(smp_processor_id()))) smp_call_function(flush_ldt, current->mm, 1); preempt_enable(); #else diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 6994c751590..652fa5c38eb 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -388,7 +388,6 @@ static int do_microcode_update (void) void *new_mc = NULL; int cpu; cpumask_t old; - cpumask_of_cpu_ptr_declare(newmask); old = current->cpus_allowed; @@ -405,8 +404,7 @@ static int do_microcode_update (void) if (!uci->valid) continue; - cpumask_of_cpu_ptr_next(newmask, cpu); - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); error = get_maching_microcode(new_mc, cpu); if (error < 0) goto out; @@ -576,7 +574,6 @@ static int apply_microcode_check_cpu(int cpu) struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); unsigned int val[2]; int err = 0; @@ -585,7 +582,7 @@ static int apply_microcode_check_cpu(int cpu) return 0; old = current->cpus_allowed; - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); /* Check if the microcode we have in memory matches the CPU */ if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || @@ -623,12 +620,11 @@ static int apply_microcode_check_cpu(int cpu) static void microcode_init_cpu(int cpu, int resume) { cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; old = current->cpus_allowed; - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING && !resume) @@ -661,13 +657,10 @@ static ssize_t reload_store(struct sys_device *dev, if (end == buf) return -EINVAL; if (val == 1) { - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - - old = current->cpus_allowed; + cpumask_t old = current->cpus_allowed; get_online_cpus(); - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); if (uci->valid) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 06a9f643817..724adfc63cb 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -414,25 +414,20 @@ void native_machine_shutdown(void) /* The boot cpu is always logical cpu 0 */ int reboot_cpu_id = 0; - cpumask_of_cpu_ptr(newmask, reboot_cpu_id); #ifdef CONFIG_X86_32 /* See if there has been given a command line override */ if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && - cpu_online(reboot_cpu)) { + cpu_online(reboot_cpu)) reboot_cpu_id = reboot_cpu; - cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); - } #endif /* Make certain the cpu I'm about to reboot on is online */ - if (!cpu_online(reboot_cpu_id)) { + if (!cpu_online(reboot_cpu_id)) reboot_cpu_id = smp_processor_id(); - cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); - } /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); /* O.K Now that I'm on the appropriate processor, * stop all of the others. diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index a2c3f9cfa54..a56fc6c4394 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -827,7 +827,6 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) static int acpi_processor_get_throttling(struct acpi_processor *pr) { cpumask_t saved_mask; - cpumask_of_cpu_ptr_declare(new_mask); int ret; if (!pr) @@ -839,8 +838,7 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr) * Migrate task to the cpu pointed by pr. */ saved_mask = current->cpus_allowed; - cpumask_of_cpu_ptr_next(new_mask, pr->id); - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id)); ret = pr->throttling.acpi_processor_get_throttling(pr); /* restore the previous state */ set_cpus_allowed_ptr(current, &saved_mask); @@ -989,7 +987,6 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, int acpi_processor_set_throttling(struct acpi_processor *pr, int state) { cpumask_t saved_mask; - cpumask_of_cpu_ptr_declare(new_mask); int ret = 0; unsigned int i; struct acpi_processor *match_pr; @@ -1028,8 +1025,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * it can be called only for the cpu pointed by pr. */ if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) { - cpumask_of_cpu_ptr_next(new_mask, pr->id); - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id)); ret = p_throttling->acpi_processor_set_throttling(pr, t_state.target_state); } else { @@ -1060,8 +1056,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) continue; } t_state.cpu = i; - cpumask_of_cpu_ptr_next(new_mask, i); - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); ret = match_pr->throttling. acpi_processor_set_throttling( match_pr, t_state.target_state); diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index c66817e7717..50a071f1c94 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -245,7 +245,6 @@ static ssize_t host_control_on_shutdown_store(struct device *dev, static int smi_request(struct smi_cmd *smi_cmd) { cpumask_t old_mask; - cpumask_of_cpu_ptr(new_mask, 0); int ret = 0; if (smi_cmd->magic != SMI_CMD_MAGIC) { @@ -256,7 +255,7 @@ static int smi_request(struct smi_cmd *smi_cmd) /* SMI requires CPU 0 */ old_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(0)); if (smp_processor_id() != 0) { dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n", __func__); diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 579b01ff82d..c3b4227f48a 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -229,11 +229,10 @@ xpc_hb_checker(void *ignore) int last_IRQ_count = 0; int new_IRQ_count; int force_IRQ = 0; - cpumask_of_cpu_ptr(cpumask, XPC_HB_CHECK_CPU); /* this thread was marked active by xpc_hb_init() */ - set_cpus_allowed_ptr(current, cpumask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU)); /* set our heartbeating to other partitions into motion */ xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 738b411ff2d..ba9b2054ecb 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -33,9 +33,8 @@ static int stopmachine(void *cpu) { int irqs_disabled = 0; int prepared = 0; - cpumask_of_cpu_ptr(cpumask, (int)(long)cpu); - set_cpus_allowed_ptr(current, cpumask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu)); /* Ack: we are alive */ smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index bf43284d685..80c4336f418 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -196,12 +196,10 @@ static int tick_check_new_device(struct clock_event_device *newdev) struct tick_device *td; int cpu, ret = NOTIFY_OK; unsigned long flags; - cpumask_of_cpu_ptr_declare(cpumask); spin_lock_irqsave(&tick_device_lock, flags); cpu = smp_processor_id(); - cpumask_of_cpu_ptr_next(cpumask, cpu); if (!cpu_isset(cpu, newdev->cpumask)) goto out_bc; @@ -209,7 +207,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) curdev = td->evtdev; /* cpu local device ? */ - if (!cpus_equal(newdev->cpumask, *cpumask)) { + if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) { /* * If the cpu affinity of the device interrupt can not @@ -222,7 +220,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) * If we have a cpu local device already, do not replace it * by a non cpu local device */ - if (curdev && cpus_equal(curdev->cpumask, *cpumask)) + if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu))) goto out_bc; } @@ -254,7 +252,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) curdev = NULL; } clockevents_exchange_device(curdev, newdev); - tick_setup_device(td, newdev, cpu, cpumask); + tick_setup_device(td, newdev, cpu, &cpumask_of_cpu(cpu)); if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) tick_oneshot_notify(); diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index ce2d723c10e..bb948e52ce2 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -213,9 +213,7 @@ static void start_stack_timers(void) int cpu; for_each_online_cpu(cpu) { - cpumask_of_cpu_ptr(new_mask, cpu); - - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); start_stack_timer(cpu); } set_cpus_allowed_ptr(current, &saved_mask); diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index c4381d9516f..0f8fc22ed10 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -11,7 +11,6 @@ notrace unsigned int debug_smp_processor_id(void) { unsigned long preempt_count = preempt_count(); int this_cpu = raw_smp_processor_id(); - cpumask_of_cpu_ptr_declare(this_mask); if (likely(preempt_count)) goto out; @@ -23,9 +22,7 @@ notrace unsigned int debug_smp_processor_id(void) * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ - cpumask_of_cpu_ptr_next(this_mask, this_cpu); - - if (cpus_equal(current->cpus_allowed, *this_mask)) + if (cpus_equal(current->cpus_allowed, cpumask_of_cpu(this_cpu))) goto out; /* diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 835d2741308..5a32cb7c4bb 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -310,8 +310,7 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) switch (m->mode) { case SVC_POOL_PERCPU: { - cpumask_of_cpu_ptr(cpumask, node); - set_cpus_allowed_ptr(task, cpumask); + set_cpus_allowed_ptr(task, &cpumask_of_cpu(node)); break; } case SVC_POOL_PERNODE: -- cgit v1.2.3-70-g09d2 From 8d8bb39b9eba32dd70e87fd5ad5c5dd4ba118e06 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 25 Jul 2008 19:44:49 -0700 Subject: dma-mapping: add the device argument to dma_mapping_error() Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER architecture does: This enables us to cleanly fix the Calgary IOMMU issue that some devices are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423). I think that per-device dma_mapping_ops support would be also helpful for KVM people to support PCI passthrough but Andi thinks that this makes it difficult to support the PCI passthrough (see the above thread). So I CC'ed this to KVM camp. Comments are appreciated. A pointer to dma_mapping_ops to struct dev_archdata is added. If the pointer is non NULL, DMA operations in asm/dma-mapping.h use it. If it's NULL, the system-wide dma_ops pointer is used as before. If it's useful for KVM people, I plan to implement a mechanism to register a hook called when a new pci (or dma capable) device is created (it works with hot plugging). It enables IOMMUs to set up an appropriate dma_mapping_ops per device. The major obstacle is that dma_mapping_error doesn't take a pointer to the device unlike other DMA operations. So x86 can't have dma_mapping_ops per device. Note all the POWER IOMMUs use the same dma_mapping_error function so this is not a problem for POWER but x86 IOMMUs use different dma_mapping_error functions. The first patch adds the device argument to dma_mapping_error. The patch is trivial but large since it touches lots of drivers and dma-mapping.h in all the architecture. This patch: dma_mapping_error() doesn't take a pointer to the device unlike other DMA operations. So we can't have dma_mapping_ops per device. Note that POWER already has dma_mapping_ops per device but all the POWER IOMMUs use the same dma_mapping_error function. x86 IOMMUs use device argument. [akpm@linux-foundation.org: fix sge] [akpm@linux-foundation.org: fix svc_rdma] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix bnx2x] [akpm@linux-foundation.org: fix s2io] [akpm@linux-foundation.org: fix pasemi_mac] [akpm@linux-foundation.org: fix sdhci] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix sparc] [akpm@linux-foundation.org: fix ibmvscsi] Signed-off-by: FUJITA Tomonori Cc: Muli Ben-Yehuda Cc: Andi Kleen Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Avi Kivity Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DMA-API.txt | 4 +- arch/arm/common/dmabounce.c | 2 +- arch/ia64/hp/common/hwsw_iommu.c | 5 +- arch/ia64/hp/common/sba_iommu.c | 2 +- arch/ia64/sn/pci/pci_dma.c | 2 +- arch/mips/mm/dma-default.c | 2 +- arch/powerpc/platforms/cell/celleb_scc_pciex.c | 2 +- arch/powerpc/platforms/cell/spider-pci.c | 2 +- arch/powerpc/platforms/iseries/mf.c | 2 +- arch/x86/kernel/pci-calgary_64.c | 2 +- arch/x86/kernel/pci-dma.c | 27 ++++--- arch/x86/kernel/pci-gart_64.c | 3 +- arch/x86/kernel/pci-nommu.c | 14 +--- arch/x86/kernel/pci-swiotlb_64.c | 2 +- drivers/firewire/fw-iso.c | 2 +- drivers/firewire/fw-ohci.c | 2 +- drivers/firewire/fw-sbp2.c | 8 +-- drivers/infiniband/hw/ipath/ipath_sdma.c | 2 +- drivers/infiniband/hw/ipath/ipath_user_sdma.c | 6 +- drivers/infiniband/hw/mthca/mthca_eq.c | 2 +- drivers/media/dvb/pluto2/pluto2.c | 2 +- drivers/mmc/host/sdhci.c | 4 +- drivers/net/arm/ep93xx_eth.c | 4 +- drivers/net/bnx2x_main.c | 4 +- drivers/net/cxgb3/sge.c | 2 +- drivers/net/e100.c | 2 +- drivers/net/e1000e/ethtool.c | 4 +- drivers/net/e1000e/netdev.c | 11 +-- drivers/net/ibmveth.c | 38 +++++----- drivers/net/iseries_veth.c | 4 +- drivers/net/mlx4/eq.c | 2 +- drivers/net/pasemi_mac.c | 6 +- drivers/net/qla3xxx.c | 12 ++-- drivers/net/s2io.c | 48 +++++++------ drivers/net/sfc/rx.c | 4 +- drivers/net/sfc/tx.c | 7 +- drivers/net/spider_net.c | 4 +- drivers/net/tc35815.c | 4 +- drivers/net/wireless/ath5k/base.c | 4 +- drivers/scsi/ibmvscsi/ibmvfc.c | 4 +- drivers/scsi/ibmvscsi/ibmvscsi.c | 4 +- drivers/scsi/ibmvscsi/ibmvstgt.c | 2 +- drivers/scsi/ibmvscsi/rpa_vscsi.c | 2 +- drivers/spi/atmel_spi.c | 4 +- drivers/spi/au1550_spi.c | 6 +- drivers/spi/omap2_mcspi.c | 4 +- drivers/spi/pxa2xx_spi.c | 4 +- drivers/spi/spi_imx.c | 6 +- include/asm-alpha/dma-mapping.h | 6 +- include/asm-alpha/pci.h | 2 +- include/asm-arm/dma-mapping.h | 2 +- include/asm-avr32/dma-mapping.h | 2 +- include/asm-cris/dma-mapping.h | 2 +- include/asm-frv/dma-mapping.h | 2 +- include/asm-generic/dma-mapping-broken.h | 2 +- include/asm-generic/dma-mapping.h | 4 +- include/asm-generic/pci-dma-compat.h | 4 +- include/asm-ia64/machvec.h | 2 +- include/asm-m68k/dma-mapping.h | 2 +- include/asm-mips/dma-mapping.h | 2 +- include/asm-mn10300/dma-mapping.h | 2 +- include/asm-parisc/dma-mapping.h | 2 +- include/asm-powerpc/dma-mapping.h | 2 +- include/asm-sh/dma-mapping.h | 2 +- include/asm-sparc/dma-mapping_64.h | 2 +- include/asm-sparc/pci_32.h | 3 +- include/asm-sparc/pci_64.h | 5 +- include/asm-x86/device.h | 3 + include/asm-x86/dma-mapping.h | 99 ++++++++++++++++++-------- include/asm-x86/swiotlb.h | 2 +- include/asm-xtensa/dma-mapping.h | 2 +- include/linux/i2o.h | 2 +- include/linux/ssb/ssb.h | 4 +- include/rdma/ib_verbs.h | 2 +- lib/swiotlb.c | 4 +- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 3 +- 76 files changed, 256 insertions(+), 210 deletions(-) (limited to 'net') diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 80d150458c8..d8b63d164e4 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -298,10 +298,10 @@ recommended that you never use these unless you really know what the cache width is. int -dma_mapping_error(dma_addr_t dma_addr) +dma_mapping_error(struct device *dev, dma_addr_t dma_addr) int -pci_dma_mapping_error(dma_addr_t dma_addr) +pci_dma_mapping_error(struct pci_dev *hwdev, dma_addr_t dma_addr) In some circumstances dma_map_single and dma_map_page will fail to create a mapping. A driver can check for these errors by testing the returned diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index dd294734260..69130f36590 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -280,7 +280,7 @@ unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, /* * Trying to unmap an invalid mapping */ - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(dev, dma_addr)) { dev_err(dev, "Trying to unmap invalid mapping\n"); return; } diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index 1c44ec2a1d5..88b6e6f3fd8 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -186,9 +186,10 @@ hwsw_dma_supported (struct device *dev, u64 mask) } int -hwsw_dma_mapping_error (dma_addr_t dma_addr) +hwsw_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - return hwiommu_dma_mapping_error (dma_addr) || swiotlb_dma_mapping_error(dma_addr); + return hwiommu_dma_mapping_error(dev, dma_addr) || + swiotlb_dma_mapping_error(dev, dma_addr); } EXPORT_SYMBOL(hwsw_dma_mapping_error); diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 34421aed1e2..4956be40d7b 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -2147,7 +2147,7 @@ sba_dma_supported (struct device *dev, u64 mask) } int -sba_dma_mapping_error (dma_addr_t dma_addr) +sba_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 52175af299a..53ebb648449 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -350,7 +350,7 @@ void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, } EXPORT_SYMBOL(sn_dma_sync_sg_for_device); -int sn_dma_mapping_error(dma_addr_t dma_addr) +int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index ae39dd88b9a..891312f8e5a 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -348,7 +348,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nele EXPORT_SYMBOL(dma_sync_sg_for_device); -int dma_mapping_error(dma_addr_t dma_addr) +int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c index 0e04f8fb152..3e7e0f1568e 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c +++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c @@ -281,7 +281,7 @@ static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data) dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va, PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(dummy_page_da)) { + if (dma_mapping_error(bus->phb->parent, dummy_page_da)) { pr_err("PCIEX:Map dummy page failed.\n"); kfree(dummy_page_va); return -1; diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c index 418b605ac35..5122ec14527 100644 --- a/arch/powerpc/platforms/cell/spider-pci.c +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -111,7 +111,7 @@ static int __init spiderpci_pci_setup_chip(struct pci_controller *phb, dummy_page_da = dma_map_single(phb->parent, dummy_page_va, PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(dummy_page_da)) { + if (dma_mapping_error(phb->parent, dummy_page_da)) { pr_err("SPIDER-IOWA:Map dummy page filed.\n"); kfree(dummy_page_va); return -1; diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c index 1dc7295746d..731d7b15774 100644 --- a/arch/powerpc/platforms/iseries/mf.c +++ b/arch/powerpc/platforms/iseries/mf.c @@ -871,7 +871,7 @@ static int proc_mf_dump_cmdline(char *page, char **start, off_t off, count = 256 - off; dma_addr = iseries_hv_map(page, off + count, DMA_FROM_DEVICE); - if (dma_mapping_error(dma_addr)) + if (dma_mapping_error(NULL, dma_addr)) return -ENOMEM; memset(page, 0, off + count); memset(&vsp_cmd, 0, sizeof(vsp_cmd)); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 19e7fc7c2c4..1eb86be93d7 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -544,7 +544,7 @@ error: return ret; } -static const struct dma_mapping_ops calgary_dma_ops = { +static struct dma_mapping_ops calgary_dma_ops = { .alloc_coherent = calgary_alloc_coherent, .map_single = calgary_map_single, .unmap_single = calgary_unmap_single, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index cbecb05551b..37544123896 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -11,7 +11,7 @@ static int forbid_dac __read_mostly; -const struct dma_mapping_ops *dma_ops; +struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -312,6 +312,8 @@ static int dma_release_coherent(struct device *dev, int order, void *vaddr) int dma_supported(struct device *dev, u64 mask) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + #ifdef CONFIG_PCI if (mask > 0xffffffff && forbid_dac > 0) { dev_info(dev, "PCI: Disallowing DAC for device\n"); @@ -319,8 +321,8 @@ int dma_supported(struct device *dev, u64 mask) } #endif - if (dma_ops->dma_supported) - return dma_ops->dma_supported(dev, mask); + if (ops->dma_supported) + return ops->dma_supported(dev, mask); /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. @@ -367,6 +369,7 @@ void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { + struct dma_mapping_ops *ops = get_dma_ops(dev); void *memory = NULL; struct page *page; unsigned long dma_mask = 0; @@ -435,8 +438,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, /* Let low level make its own zone decisions */ gfp &= ~(GFP_DMA32|GFP_DMA); - if (dma_ops->alloc_coherent) - return dma_ops->alloc_coherent(dev, size, + if (ops->alloc_coherent) + return ops->alloc_coherent(dev, size, dma_handle, gfp); return NULL; } @@ -448,14 +451,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, } } - if (dma_ops->alloc_coherent) { + if (ops->alloc_coherent) { free_pages((unsigned long)memory, get_order(size)); gfp &= ~(GFP_DMA|GFP_DMA32); - return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); + return ops->alloc_coherent(dev, size, dma_handle, gfp); } - if (dma_ops->map_simple) { - *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), + if (ops->map_simple) { + *dma_handle = ops->map_simple(dev, virt_to_phys(memory), size, PCI_DMA_BIDIRECTIONAL); if (*dma_handle != bad_dma_address) @@ -477,12 +480,14 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t bus) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ if (dma_release_coherent(dev, order, vaddr)) return; - if (dma_ops->unmap_single) - dma_ops->unmap_single(dev, bus, size, 0); + if (ops->unmap_single) + ops->unmap_single(dev, bus, size, 0); free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index df5f142657d..744126e6495 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -692,8 +692,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) extern int agp_amd64_init(void); -static const struct dma_mapping_ops gart_dma_ops = { - .mapping_error = NULL, +static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, .map_simple = gart_map_simple, .unmap_single = gart_unmap_single, diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 792b9179eff..3f91f71cdc3 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } -/* Make sure we keep the same behaviour */ -static int nommu_mapping_error(dma_addr_t dma_addr) -{ -#ifdef CONFIG_X86_32 - return 0; -#else - return (dma_addr == bad_dma_address); -#endif -} - - -const struct dma_mapping_ops nommu_dma_ops = { +struct dma_mapping_ops nommu_dma_ops = { .map_single = nommu_map_single, .map_sg = nommu_map_sg, - .mapping_error = nommu_mapping_error, .is_phys = 1, }; diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 20df839b9c2..c4ce0332759 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); } -const struct dma_mapping_ops swiotlb_dma_ops = { +struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, diff --git a/drivers/firewire/fw-iso.c b/drivers/firewire/fw-iso.c index bcbe794a3ea..e14c03dc006 100644 --- a/drivers/firewire/fw-iso.c +++ b/drivers/firewire/fw-iso.c @@ -50,7 +50,7 @@ fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card, address = dma_map_page(card->device, buffer->pages[i], 0, PAGE_SIZE, direction); - if (dma_mapping_error(address)) { + if (dma_mapping_error(card->device, address)) { __free_page(buffer->pages[i]); goto out_pages; } diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index 333b12544dd..566672e0bcf 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -953,7 +953,7 @@ at_context_queue_packet(struct context *ctx, struct fw_packet *packet) payload_bus = dma_map_single(ohci->card.device, packet->payload, packet->payload_length, DMA_TO_DEVICE); - if (dma_mapping_error(payload_bus)) { + if (dma_mapping_error(ohci->card.device, payload_bus)) { packet->ack = RCODE_SEND_ERROR; return -1; } diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 53fc5a641e6..aaff50ebba1 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -543,7 +543,7 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, orb->response_bus = dma_map_single(device->card->device, &orb->response, sizeof(orb->response), DMA_FROM_DEVICE); - if (dma_mapping_error(orb->response_bus)) + if (dma_mapping_error(device->card->device, orb->response_bus)) goto fail_mapping_response; orb->request.response.high = 0; @@ -577,7 +577,7 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, orb->base.request_bus = dma_map_single(device->card->device, &orb->request, sizeof(orb->request), DMA_TO_DEVICE); - if (dma_mapping_error(orb->base.request_bus)) + if (dma_mapping_error(device->card->device, orb->base.request_bus)) goto fail_mapping_request; sbp2_send_orb(&orb->base, lu, node_id, generation, @@ -1424,7 +1424,7 @@ sbp2_map_scatterlist(struct sbp2_command_orb *orb, struct fw_device *device, orb->page_table_bus = dma_map_single(device->card->device, orb->page_table, sizeof(orb->page_table), DMA_TO_DEVICE); - if (dma_mapping_error(orb->page_table_bus)) + if (dma_mapping_error(device->card->device, orb->page_table_bus)) goto fail_page_table; /* @@ -1509,7 +1509,7 @@ static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done) orb->base.request_bus = dma_map_single(device->card->device, &orb->request, sizeof(orb->request), DMA_TO_DEVICE); - if (dma_mapping_error(orb->base.request_bus)) + if (dma_mapping_error(device->card->device, orb->base.request_bus)) goto out; sbp2_send_orb(&orb->base, lu, lu->tgt->node_id, lu->generation, diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index eaba03273e4..284c9bca517 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -698,7 +698,7 @@ retry: addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr, tx->map_len, DMA_TO_DEVICE); - if (dma_mapping_error(addr)) { + if (dma_mapping_error(&dd->pcidev->dev, addr)) { ret = -EIO; goto unlock; } diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c index 86e016916cd..82d9a0b5ca2 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c @@ -206,7 +206,7 @@ static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd, dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto free_unmap; } @@ -301,7 +301,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd, pages[j], 0, flen, DMA_TO_DEVICE); unsigned long fofs = addr & ~PAGE_MASK; - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto done; } @@ -508,7 +508,7 @@ static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd, if (page) { dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto free_pbc; } diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 4e36aa7cb3d..cc6858f0b65 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -780,7 +780,7 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) return -ENOMEM; dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(dev->eq_table.icm_dma)) { + if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) { __free_page(dev->eq_table.icm_page); return -ENOMEM; } diff --git a/drivers/media/dvb/pluto2/pluto2.c b/drivers/media/dvb/pluto2/pluto2.c index 1360403b88b..a9653c63f4d 100644 --- a/drivers/media/dvb/pluto2/pluto2.c +++ b/drivers/media/dvb/pluto2/pluto2.c @@ -242,7 +242,7 @@ static int __devinit pluto_dma_map(struct pluto *pluto) pluto->dma_addr = pci_map_single(pluto->pdev, pluto->dma_buf, TS_DMA_BYTES, PCI_DMA_FROMDEVICE); - return pci_dma_mapping_error(pluto->dma_addr); + return pci_dma_mapping_error(pluto->pdev, pluto->dma_addr); } static void pluto_dma_unmap(struct pluto *pluto) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index c3a5db72ddd..5f95e10229b 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -337,7 +337,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host, host->align_addr = dma_map_single(mmc_dev(host->mmc), host->align_buffer, 128 * 4, direction); - if (dma_mapping_error(host->align_addr)) + if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr)) goto fail; BUG_ON(host->align_addr & 0x3); @@ -439,7 +439,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host, host->adma_addr = dma_map_single(mmc_dev(host->mmc), host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE); - if (dma_mapping_error(host->align_addr)) + if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr)) goto unmap_entries; BUG_ON(host->adma_addr & 0x3); diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c index 7a14980f347..18d3eeb7eab 100644 --- a/drivers/net/arm/ep93xx_eth.c +++ b/drivers/net/arm/ep93xx_eth.c @@ -482,7 +482,7 @@ static int ep93xx_alloc_buffers(struct ep93xx_priv *ep) goto err; d = dma_map_single(NULL, page, PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(d)) { + if (dma_mapping_error(NULL, d)) { free_page((unsigned long)page); goto err; } @@ -505,7 +505,7 @@ static int ep93xx_alloc_buffers(struct ep93xx_priv *ep) goto err; d = dma_map_single(NULL, page, PAGE_SIZE, DMA_TO_DEVICE); - if (dma_mapping_error(d)) { + if (dma_mapping_error(NULL, d)) { free_page((unsigned long)page); goto err; } diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 0263bef9cc6..c7cc760a177 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -1020,7 +1020,7 @@ static inline int bnx2x_alloc_rx_sge(struct bnx2x *bp, mapping = pci_map_page(bp->pdev, page, 0, BCM_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(mapping))) { + if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) { __free_pages(page, PAGES_PER_SGE_SHIFT); return -ENOMEM; } @@ -1048,7 +1048,7 @@ static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp, mapping = pci_map_single(bp->pdev, skb->data, bp->rx_buf_use_size, PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(mapping))) { + if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) { dev_kfree_skb(skb); return -ENOMEM; } diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index a96331c875e..1b0861d73ab 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -386,7 +386,7 @@ static inline int add_one_rx_buf(void *va, unsigned int len, dma_addr_t mapping; mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE); - if (unlikely(pci_dma_mapping_error(mapping))) + if (unlikely(pci_dma_mapping_error(pdev, mapping))) return -ENOMEM; pci_unmap_addr_set(sd, dma_addr, mapping); diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 1037b133231..19d32a227be 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1790,7 +1790,7 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx) rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data, RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(rx->dma_addr)) { + if (pci_dma_mapping_error(nic->pdev, rx->dma_addr)) { dev_kfree_skb_any(rx->skb); rx->skb = NULL; rx->dma_addr = 0; diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c index a14561f40db..9350564065e 100644 --- a/drivers/net/e1000e/ethtool.c +++ b/drivers/net/e1000e/ethtool.c @@ -1090,7 +1090,7 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) tx_ring->buffer_info[i].dma = pci_map_single(pdev, skb->data, skb->len, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(tx_ring->buffer_info[i].dma)) { + if (pci_dma_mapping_error(pdev, tx_ring->buffer_info[i].dma)) { ret_val = 4; goto err_nomem; } @@ -1153,7 +1153,7 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) rx_ring->buffer_info[i].dma = pci_map_single(pdev, skb->data, 2048, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rx_ring->buffer_info[i].dma)) { + if (pci_dma_mapping_error(pdev, rx_ring->buffer_info[i].dma)) { ret_val = 8; goto err_nomem; } diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 9c0f56b3c51..d1367789976 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -195,7 +195,7 @@ map_skb: buffer_info->dma = pci_map_single(pdev, skb->data, adapter->rx_buffer_len, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(buffer_info->dma)) { + if (pci_dma_mapping_error(pdev, buffer_info->dma)) { dev_err(&pdev->dev, "RX DMA map failed\n"); adapter->rx_dma_failed++; break; @@ -265,7 +265,7 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, ps_page->page, 0, PAGE_SIZE, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(ps_page->dma)) { + if (pci_dma_mapping_error(pdev, ps_page->dma)) { dev_err(&adapter->pdev->dev, "RX DMA page map failed\n"); adapter->rx_dma_failed++; @@ -300,7 +300,7 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, buffer_info->dma = pci_map_single(pdev, skb->data, adapter->rx_ps_bsize0, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(buffer_info->dma)) { + if (pci_dma_mapping_error(pdev, buffer_info->dma)) { dev_err(&pdev->dev, "RX DMA map failed\n"); adapter->rx_dma_failed++; /* cleanup skb */ @@ -3344,7 +3344,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter, skb->data + offset, size, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(buffer_info->dma)) { + if (pci_dma_mapping_error(adapter->pdev, buffer_info->dma)) { dev_err(&adapter->pdev->dev, "TX DMA map failed\n"); adapter->tx_dma_failed++; return -1; @@ -3382,7 +3382,8 @@ static int e1000_tx_map(struct e1000_adapter *adapter, offset, size, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(buffer_info->dma)) { + if (pci_dma_mapping_error(adapter->pdev, + buffer_info->dma)) { dev_err(&adapter->pdev->dev, "TX DMA page map failed\n"); adapter->tx_dma_failed++; diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index e5a6e2e8454..91ec9fdc718 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -260,7 +260,7 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, pool->buff_size, DMA_FROM_DEVICE); - if (dma_mapping_error(dma_addr)) + if (dma_mapping_error((&adapter->vdev->dev, dma_addr)) goto failure; pool->free_map[free_index] = IBM_VETH_INVALID_MAP; @@ -294,7 +294,7 @@ failure: pool->consumer_index = pool->size - 1; else pool->consumer_index--; - if (!dma_mapping_error(dma_addr)) + if (!dma_mapping_error((&adapter->vdev->dev, dma_addr)) dma_unmap_single(&adapter->vdev->dev, pool->dma_addr[index], pool->buff_size, DMA_FROM_DEVICE); @@ -448,11 +448,11 @@ static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter) static void ibmveth_cleanup(struct ibmveth_adapter *adapter) { int i; + struct device *dev = &adapter->vdev->dev; if(adapter->buffer_list_addr != NULL) { - if(!dma_mapping_error(adapter->buffer_list_dma)) { - dma_unmap_single(&adapter->vdev->dev, - adapter->buffer_list_dma, 4096, + if (!dma_mapping_error(dev, adapter->buffer_list_dma)) { + dma_unmap_single(dev, adapter->buffer_list_dma, 4096, DMA_BIDIRECTIONAL); adapter->buffer_list_dma = DMA_ERROR_CODE; } @@ -461,9 +461,8 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter) } if(adapter->filter_list_addr != NULL) { - if(!dma_mapping_error(adapter->filter_list_dma)) { - dma_unmap_single(&adapter->vdev->dev, - adapter->filter_list_dma, 4096, + if (!dma_mapping_error(dev, adapter->filter_list_dma)) { + dma_unmap_single(dev, adapter->filter_list_dma, 4096, DMA_BIDIRECTIONAL); adapter->filter_list_dma = DMA_ERROR_CODE; } @@ -472,8 +471,8 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter) } if(adapter->rx_queue.queue_addr != NULL) { - if(!dma_mapping_error(adapter->rx_queue.queue_dma)) { - dma_unmap_single(&adapter->vdev->dev, + if (!dma_mapping_error(dev, adapter->rx_queue.queue_dma)) { + dma_unmap_single(dev, adapter->rx_queue.queue_dma, adapter->rx_queue.queue_len, DMA_BIDIRECTIONAL); @@ -535,6 +534,7 @@ static int ibmveth_open(struct net_device *netdev) int rc; union ibmveth_buf_desc rxq_desc; int i; + struct device *dev; ibmveth_debug_printk("open starting\n"); @@ -563,17 +563,19 @@ static int ibmveth_open(struct net_device *netdev) return -ENOMEM; } - adapter->buffer_list_dma = dma_map_single(&adapter->vdev->dev, + dev = &adapter->vdev->dev; + + adapter->buffer_list_dma = dma_map_single(dev, adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL); - adapter->filter_list_dma = dma_map_single(&adapter->vdev->dev, + adapter->filter_list_dma = dma_map_single(dev, adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL); - adapter->rx_queue.queue_dma = dma_map_single(&adapter->vdev->dev, + adapter->rx_queue.queue_dma = dma_map_single(dev, adapter->rx_queue.queue_addr, adapter->rx_queue.queue_len, DMA_BIDIRECTIONAL); - if((dma_mapping_error(adapter->buffer_list_dma) ) || - (dma_mapping_error(adapter->filter_list_dma)) || - (dma_mapping_error(adapter->rx_queue.queue_dma))) { + if ((dma_mapping_error(dev, adapter->buffer_list_dma)) || + (dma_mapping_error(dev, adapter->filter_list_dma)) || + (dma_mapping_error(dev, adapter->rx_queue.queue_dma))) { ibmveth_error_printk("unable to map filter or buffer list pages\n"); ibmveth_cleanup(adapter); napi_disable(&adapter->napi); @@ -645,7 +647,7 @@ static int ibmveth_open(struct net_device *netdev) adapter->bounce_buffer_dma = dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer, netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL); - if (dma_mapping_error(adapter->bounce_buffer_dma)) { + if (dma_mapping_error(dev, adapter->bounce_buffer_dma)) { ibmveth_error_printk("unable to map bounce buffer\n"); ibmveth_cleanup(adapter); napi_disable(&adapter->napi); @@ -922,7 +924,7 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) buf[1] = 0; } - if (dma_mapping_error(data_dma_addr)) { + if (dma_mapping_error((&adapter->vdev->dev, data_dma_addr)) { if (!firmware_has_feature(FW_FEATURE_CMO)) ibmveth_error_printk("tx: unable to map xmit buffer\n"); skb_copy_from_linear_data(skb, adapter->bounce_buffer, diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index b8d0639c1cd..c46864d626b 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -1128,7 +1128,7 @@ static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp, msg->data.addr[0] = dma_map_single(port->dev, skb->data, skb->len, DMA_TO_DEVICE); - if (dma_mapping_error(msg->data.addr[0])) + if (dma_mapping_error(port->dev, msg->data.addr[0])) goto recycle_and_drop; msg->dev = port->dev; @@ -1226,7 +1226,7 @@ static void veth_recycle_msg(struct veth_lpar_connection *cnx, dma_address = msg->data.addr[0]; dma_length = msg->data.len[0]; - if (!dma_mapping_error(dma_address)) + if (!dma_mapping_error(msg->dev, dma_address)) dma_unmap_single(msg->dev, dma_address, dma_length, DMA_TO_DEVICE); diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index ea3a09aaa84..7df928d3a3d 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -526,7 +526,7 @@ int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt) return -ENOMEM; priv->eq_table.icm_dma = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(priv->eq_table.icm_dma)) { + if (pci_dma_mapping_error(dev->pdev, priv->eq_table.icm_dma)) { __free_page(priv->eq_table.icm_page); return -ENOMEM; } diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c index 993d87c9296..edc0fd58898 100644 --- a/drivers/net/pasemi_mac.c +++ b/drivers/net/pasemi_mac.c @@ -650,7 +650,7 @@ static void pasemi_mac_replenish_rx_ring(const struct net_device *dev, mac->bufsz - LOCAL_SKB_ALIGN, PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(dma))) { + if (unlikely(pci_dma_mapping_error(mac->dma_pdev, dma))) { dev_kfree_skb_irq(info->skb); break; } @@ -1519,7 +1519,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev) map[0] = pci_map_single(mac->dma_pdev, skb->data, skb_headlen(skb), PCI_DMA_TODEVICE); map_size[0] = skb_headlen(skb); - if (dma_mapping_error(map[0])) + if (pci_dma_mapping_error(mac->dma_pdev, map[0])) goto out_err_nolock; for (i = 0; i < nfrags; i++) { @@ -1529,7 +1529,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev) frag->page_offset, frag->size, PCI_DMA_TODEVICE); map_size[i+1] = frag->size; - if (dma_mapping_error(map[i+1])) { + if (pci_dma_mapping_error(mac->dma_pdev, map[i+1])) { nfrags = i; goto out_err_nolock; } diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c index e7d48a352be..e82b37bbd6c 100644 --- a/drivers/net/qla3xxx.c +++ b/drivers/net/qla3xxx.c @@ -328,7 +328,7 @@ static void ql_release_to_lrg_buf_free_list(struct ql3_adapter *qdev, qdev->lrg_buffer_len - QL_HEADER_SPACE, PCI_DMA_FROMDEVICE); - err = pci_dma_mapping_error(map); + err = pci_dma_mapping_error(qdev->pdev, map); if(err) { printk(KERN_ERR "%s: PCI mapping failed with error: %d\n", qdev->ndev->name, err); @@ -1919,7 +1919,7 @@ static int ql_populate_free_queue(struct ql3_adapter *qdev) QL_HEADER_SPACE, PCI_DMA_FROMDEVICE); - err = pci_dma_mapping_error(map); + err = pci_dma_mapping_error(qdev->pdev, map); if(err) { printk(KERN_ERR "%s: PCI mapping failed with error: %d\n", qdev->ndev->name, err); @@ -2454,7 +2454,7 @@ static int ql_send_map(struct ql3_adapter *qdev, */ map = pci_map_single(qdev->pdev, skb->data, len, PCI_DMA_TODEVICE); - err = pci_dma_mapping_error(map); + err = pci_dma_mapping_error(qdev->pdev, map); if(err) { printk(KERN_ERR "%s: PCI mapping failed with error: %d\n", qdev->ndev->name, err); @@ -2487,7 +2487,7 @@ static int ql_send_map(struct ql3_adapter *qdev, sizeof(struct oal), PCI_DMA_TODEVICE); - err = pci_dma_mapping_error(map); + err = pci_dma_mapping_error(qdev->pdev, map); if(err) { printk(KERN_ERR "%s: PCI mapping outbound address list with error: %d\n", @@ -2514,7 +2514,7 @@ static int ql_send_map(struct ql3_adapter *qdev, frag->page_offset, frag->size, PCI_DMA_TODEVICE); - err = pci_dma_mapping_error(map); + err = pci_dma_mapping_error(qdev->pdev, map); if(err) { printk(KERN_ERR "%s: PCI mapping frags failed with error: %d\n", qdev->ndev->name, err); @@ -2916,7 +2916,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev) QL_HEADER_SPACE, PCI_DMA_FROMDEVICE); - err = pci_dma_mapping_error(map); + err = pci_dma_mapping_error(qdev->pdev, map); if(err) { printk(KERN_ERR "%s: PCI mapping failed with error: %d\n", qdev->ndev->name, err); diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 9dae40ccf04..86d77d05190 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -2512,8 +2512,8 @@ static void stop_nic(struct s2io_nic *nic) * Return Value: * SUCCESS on success or an appropriate -ve value on failure. */ - -static int fill_rx_buffers(struct ring_info *ring, int from_card_up) +static int fill_rx_buffers(struct s2io_nic *nic, struct ring_info *ring, + int from_card_up) { struct sk_buff *skb; struct RxD_t *rxdp; @@ -2602,7 +2602,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up) rxdp1->Buffer0_ptr = pci_map_single (ring->pdev, skb->data, size - NET_IP_ALIGN, PCI_DMA_FROMDEVICE); - if(pci_dma_mapping_error(rxdp1->Buffer0_ptr)) + if (pci_dma_mapping_error(nic->pdev, + rxdp1->Buffer0_ptr)) goto pci_map_failed; rxdp->Control_2 = @@ -2636,7 +2637,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up) rxdp3->Buffer0_ptr = pci_map_single(ring->pdev, ba->ba_0, BUF0_LEN, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rxdp3->Buffer0_ptr)) + if (pci_dma_mapping_error(nic->pdev, + rxdp3->Buffer0_ptr)) goto pci_map_failed; } else pci_dma_sync_single_for_device(ring->pdev, @@ -2655,7 +2657,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up) (ring->pdev, skb->data, ring->mtu + 4, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rxdp3->Buffer2_ptr)) + if (pci_dma_mapping_error(nic->pdev, + rxdp3->Buffer2_ptr)) goto pci_map_failed; if (from_card_up) { @@ -2664,8 +2667,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up) ba->ba_1, BUF1_LEN, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error - (rxdp3->Buffer1_ptr)) { + if (pci_dma_mapping_error(nic->pdev, + rxdp3->Buffer1_ptr)) { pci_unmap_single (ring->pdev, (dma_addr_t)(unsigned long) @@ -2806,9 +2809,9 @@ static void free_rx_buffers(struct s2io_nic *sp) } } -static int s2io_chk_rx_buffers(struct ring_info *ring) +static int s2io_chk_rx_buffers(struct s2io_nic *nic, struct ring_info *ring) { - if (fill_rx_buffers(ring, 0) == -ENOMEM) { + if (fill_rx_buffers(nic, ring, 0) == -ENOMEM) { DBG_PRINT(INFO_DBG, "%s:Out of memory", ring->dev->name); DBG_PRINT(INFO_DBG, " in Rx Intr!!\n"); } @@ -2848,7 +2851,7 @@ static int s2io_poll_msix(struct napi_struct *napi, int budget) return 0; pkts_processed = rx_intr_handler(ring, budget); - s2io_chk_rx_buffers(ring); + s2io_chk_rx_buffers(nic, ring); if (pkts_processed < budget_org) { netif_rx_complete(dev, napi); @@ -2882,7 +2885,7 @@ static int s2io_poll_inta(struct napi_struct *napi, int budget) for (i = 0; i < config->rx_ring_num; i++) { ring = &mac_control->rings[i]; ring_pkts_processed = rx_intr_handler(ring, budget); - s2io_chk_rx_buffers(ring); + s2io_chk_rx_buffers(nic, ring); pkts_processed += ring_pkts_processed; budget -= ring_pkts_processed; if (budget <= 0) @@ -2939,7 +2942,8 @@ static void s2io_netpoll(struct net_device *dev) rx_intr_handler(&mac_control->rings[i], 0); for (i = 0; i < config->rx_ring_num; i++) { - if (fill_rx_buffers(&mac_control->rings[i], 0) == -ENOMEM) { + if (fill_rx_buffers(nic, &mac_control->rings[i], 0) == + -ENOMEM) { DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name); DBG_PRINT(INFO_DBG, " in Rx Netpoll!!\n"); break; @@ -4235,14 +4239,14 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Buffer_Pointer = pci_map_single(sp->pdev, fifo->ufo_in_band_v, sizeof(u64), PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(txdp->Buffer_Pointer)) + if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer)) goto pci_map_failed; txdp++; } txdp->Buffer_Pointer = pci_map_single (sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(txdp->Buffer_Pointer)) + if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer)) goto pci_map_failed; txdp->Host_Control = (unsigned long) skb; @@ -4345,7 +4349,7 @@ static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id) netif_rx_schedule(dev, &ring->napi); } else { rx_intr_handler(ring, 0); - s2io_chk_rx_buffers(ring); + s2io_chk_rx_buffers(sp, ring); } return IRQ_HANDLED; @@ -4826,7 +4830,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id) */ if (!config->napi) { for (i = 0; i < config->rx_ring_num; i++) - s2io_chk_rx_buffers(&mac_control->rings[i]); + s2io_chk_rx_buffers(sp, &mac_control->rings[i]); } writeq(sp->general_int_mask, &bar0->general_int_mask); readl(&bar0->general_int_status); @@ -6859,7 +6863,7 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp, pci_map_single( sp->pdev, (*skb)->data, size - NET_IP_ALIGN, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rxdp1->Buffer0_ptr)) + if (pci_dma_mapping_error(sp->pdev, rxdp1->Buffer0_ptr)) goto memalloc_failed; rxdp->Host_Control = (unsigned long) (*skb); } @@ -6886,12 +6890,13 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp, pci_map_single(sp->pdev, (*skb)->data, dev->mtu + 4, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rxdp3->Buffer2_ptr)) + if (pci_dma_mapping_error(sp->pdev, rxdp3->Buffer2_ptr)) goto memalloc_failed; rxdp3->Buffer0_ptr = *temp0 = pci_map_single( sp->pdev, ba->ba_0, BUF0_LEN, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rxdp3->Buffer0_ptr)) { + if (pci_dma_mapping_error(sp->pdev, + rxdp3->Buffer0_ptr)) { pci_unmap_single (sp->pdev, (dma_addr_t)rxdp3->Buffer2_ptr, dev->mtu + 4, PCI_DMA_FROMDEVICE); @@ -6903,7 +6908,8 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp, rxdp3->Buffer1_ptr = *temp1 = pci_map_single(sp->pdev, ba->ba_1, BUF1_LEN, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rxdp3->Buffer1_ptr)) { + if (pci_dma_mapping_error(sp->pdev, + rxdp3->Buffer1_ptr)) { pci_unmap_single (sp->pdev, (dma_addr_t)rxdp3->Buffer0_ptr, BUF0_LEN, PCI_DMA_FROMDEVICE); @@ -7187,7 +7193,7 @@ static int s2io_card_up(struct s2io_nic * sp) for (i = 0; i < config->rx_ring_num; i++) { mac_control->rings[i].mtu = dev->mtu; - ret = fill_rx_buffers(&mac_control->rings[i], 1); + ret = fill_rx_buffers(sp, &mac_control->rings[i], 1); if (ret) { DBG_PRINT(ERR_DBG, "%s: Out of memory in Open\n", dev->name); diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c index 601b001437c..0d27dd39bc0 100644 --- a/drivers/net/sfc/rx.c +++ b/drivers/net/sfc/rx.c @@ -233,7 +233,7 @@ static inline int efx_init_rx_buffer_skb(struct efx_rx_queue *rx_queue, rx_buf->data, rx_buf->len, PCI_DMA_FROMDEVICE); - if (unlikely(pci_dma_mapping_error(rx_buf->dma_addr))) { + if (unlikely(pci_dma_mapping_error(efx->pci_dev, rx_buf->dma_addr))) { dev_kfree_skb_any(rx_buf->skb); rx_buf->skb = NULL; return -EIO; @@ -275,7 +275,7 @@ static inline int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue, 0, efx_rx_buf_size(efx), PCI_DMA_FROMDEVICE); - if (unlikely(pci_dma_mapping_error(dma_addr))) { + if (unlikely(pci_dma_mapping_error(efx->pci_dev, dma_addr))) { __free_pages(rx_buf->page, efx->rx_buffer_order); rx_buf->page = NULL; return -EIO; diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c index 5cdd082ab8f..5e8374ab28e 100644 --- a/drivers/net/sfc/tx.c +++ b/drivers/net/sfc/tx.c @@ -172,7 +172,7 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue, /* Process all fragments */ while (1) { - if (unlikely(pci_dma_mapping_error(dma_addr))) + if (unlikely(pci_dma_mapping_error(pci_dev, dma_addr))) goto pci_err; /* Store fields for marking in the per-fragment final @@ -661,7 +661,8 @@ efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len) tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev, TSOH_BUFFER(tsoh), header_len, PCI_DMA_TODEVICE); - if (unlikely(pci_dma_mapping_error(tsoh->dma_addr))) { + if (unlikely(pci_dma_mapping_error(tx_queue->efx->pci_dev, + tsoh->dma_addr))) { kfree(tsoh); return NULL; } @@ -863,7 +864,7 @@ static inline int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, st->ifc.unmap_addr = pci_map_page(efx->pci_dev, page, page_off, len, PCI_DMA_TODEVICE); - if (likely(!pci_dma_mapping_error(st->ifc.unmap_addr))) { + if (likely(!pci_dma_mapping_error(efx->pci_dev, st->ifc.unmap_addr))) { st->ifc.unmap_len = len; st->ifc.len = len; st->ifc.dma_addr = st->ifc.unmap_addr; diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c index 00aa0b108cb..b6435d0d71f 100644 --- a/drivers/net/spider_net.c +++ b/drivers/net/spider_net.c @@ -452,7 +452,7 @@ spider_net_prepare_rx_descr(struct spider_net_card *card, /* iommu-map the skb */ buf = pci_map_single(card->pdev, descr->skb->data, SPIDER_NET_MAX_FRAME, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(buf)) { + if (pci_dma_mapping_error(card->pdev, buf)) { dev_kfree_skb_any(descr->skb); descr->skb = NULL; if (netif_msg_rx_err(card) && net_ratelimit()) @@ -691,7 +691,7 @@ spider_net_prepare_tx_descr(struct spider_net_card *card, unsigned long flags; buf = pci_map_single(card->pdev, skb->data, skb->len, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(buf)) { + if (pci_dma_mapping_error(card->pdev, buf)) { if (netif_msg_tx_err(card) && net_ratelimit()) dev_err(&card->netdev->dev, "could not iommu-map packet (%p, %i). " "Dropping packet\n", skb->data, skb->len); diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c index a645e5028c1..8487ace9d2e 100644 --- a/drivers/net/tc35815.c +++ b/drivers/net/tc35815.c @@ -506,7 +506,7 @@ static void *alloc_rxbuf_page(struct pci_dev *hwdev, dma_addr_t *dma_handle) return NULL; *dma_handle = pci_map_single(hwdev, buf, PAGE_SIZE, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(*dma_handle)) { + if (pci_dma_mapping_error(hwdev, *dma_handle)) { free_page((unsigned long)buf); return NULL; } @@ -536,7 +536,7 @@ static struct sk_buff *alloc_rxbuf_skb(struct net_device *dev, return NULL; *dma_handle = pci_map_single(hwdev, skb->data, RX_BUF_SIZE, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(*dma_handle)) { + if (pci_dma_mapping_error(hwdev, *dma_handle)) { dev_kfree_skb_any(skb); return NULL; } diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index 217d506527a..d9769c52734 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -1166,7 +1166,7 @@ ath5k_rxbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf) bf->skb = skb; bf->skbaddr = pci_map_single(sc->pdev, skb->data, sc->rxbufsize, PCI_DMA_FROMDEVICE); - if (unlikely(pci_dma_mapping_error(bf->skbaddr))) { + if (unlikely(pci_dma_mapping_error(sc->pdev, bf->skbaddr))) { ATH5K_ERR(sc, "%s: DMA mapping failed\n", __func__); dev_kfree_skb(skb); bf->skb = NULL; @@ -1918,7 +1918,7 @@ ath5k_beacon_setup(struct ath5k_softc *sc, struct ath5k_buf *bf) ATH5K_DBG(sc, ATH5K_DEBUG_BEACON, "skb %p [data %p len %u] " "skbaddr %llx\n", skb, skb->data, skb->len, (unsigned long long)bf->skbaddr); - if (pci_dma_mapping_error(bf->skbaddr)) { + if (pci_dma_mapping_error(sc->pdev, bf->skbaddr)) { ATH5K_ERR(sc, "beacon DMA mapping failed\n"); return -EIO; } diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index c4a7c06793c..61f8fdea2d9 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3525,7 +3525,7 @@ static int ibmvfc_init_crq(struct ibmvfc_host *vhost) crq->msg_token = dma_map_single(dev, crq->msgs, PAGE_SIZE, DMA_BIDIRECTIONAL); - if (dma_mapping_error(crq->msg_token)) + if (dma_mapping_error(dev, crq->msg_token)) goto map_failed; retrc = rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, @@ -3618,7 +3618,7 @@ static int ibmvfc_alloc_mem(struct ibmvfc_host *vhost) async_q->size * sizeof(*async_q->msgs), DMA_BIDIRECTIONAL); - if (dma_mapping_error(async_q->msg_token)) { + if (dma_mapping_error(dev, async_q->msg_token)) { dev_err(dev, "Failed to map async queue\n"); goto free_async_crq; } diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 20000ec79b0..6b24b9cdb04 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -859,7 +859,7 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata) sizeof(hostdata->madapter_info), DMA_BIDIRECTIONAL); - if (dma_mapping_error(req->buffer)) { + if (dma_mapping_error(hostdata->dev, req->buffer)) { if (!firmware_has_feature(FW_FEATURE_CMO)) dev_err(hostdata->dev, "Unable to map request_buffer for " @@ -1407,7 +1407,7 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata, length, DMA_BIDIRECTIONAL); - if (dma_mapping_error(host_config->buffer)) { + if (dma_mapping_error(hostdata->dev, host_config->buffer)) { if (!firmware_has_feature(FW_FEATURE_CMO)) dev_err(hostdata->dev, "dma_mapping error getting host config\n"); diff --git a/drivers/scsi/ibmvscsi/ibmvstgt.c b/drivers/scsi/ibmvscsi/ibmvstgt.c index 3b9514c8f1f..2e13ec00172 100644 --- a/drivers/scsi/ibmvscsi/ibmvstgt.c +++ b/drivers/scsi/ibmvscsi/ibmvstgt.c @@ -564,7 +564,7 @@ static int crq_queue_create(struct crq_queue *queue, struct srp_target *target) queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL); - if (dma_mapping_error(queue->msg_token)) + if (dma_mapping_error(target->dev, queue->msg_token)) goto map_failed; err = h_reg_crq(vport->dma_dev->unit_address, queue->msg_token, diff --git a/drivers/scsi/ibmvscsi/rpa_vscsi.c b/drivers/scsi/ibmvscsi/rpa_vscsi.c index 182146100dc..462a8574dad 100644 --- a/drivers/scsi/ibmvscsi/rpa_vscsi.c +++ b/drivers/scsi/ibmvscsi/rpa_vscsi.c @@ -253,7 +253,7 @@ static int rpavscsi_init_crq_queue(struct crq_queue *queue, queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL); - if (dma_mapping_error(queue->msg_token)) + if (dma_mapping_error(hostdata->dev, queue->msg_token)) goto map_failed; gather_partition_info(); diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c index e81d59d7891..0c716566085 100644 --- a/drivers/spi/atmel_spi.c +++ b/drivers/spi/atmel_spi.c @@ -313,14 +313,14 @@ atmel_spi_dma_map_xfer(struct atmel_spi *as, struct spi_transfer *xfer) xfer->tx_dma = dma_map_single(dev, (void *) xfer->tx_buf, xfer->len, DMA_TO_DEVICE); - if (dma_mapping_error(xfer->tx_dma)) + if (dma_mapping_error(dev, xfer->tx_dma)) return -ENOMEM; } if (xfer->rx_buf) { xfer->rx_dma = dma_map_single(dev, xfer->rx_buf, xfer->len, DMA_FROM_DEVICE); - if (dma_mapping_error(xfer->rx_dma)) { + if (dma_mapping_error(dev, xfer->rx_dma)) { if (xfer->tx_buf) dma_unmap_single(dev, xfer->tx_dma, xfer->len, diff --git a/drivers/spi/au1550_spi.c b/drivers/spi/au1550_spi.c index 9149689c79d..87b73e0169c 100644 --- a/drivers/spi/au1550_spi.c +++ b/drivers/spi/au1550_spi.c @@ -334,7 +334,7 @@ static int au1550_spi_dma_rxtmp_alloc(struct au1550_spi *hw, unsigned size) hw->dma_rx_tmpbuf_size = size; hw->dma_rx_tmpbuf_addr = dma_map_single(hw->dev, hw->dma_rx_tmpbuf, size, DMA_FROM_DEVICE); - if (dma_mapping_error(hw->dma_rx_tmpbuf_addr)) { + if (dma_mapping_error(hw->dev, hw->dma_rx_tmpbuf_addr)) { kfree(hw->dma_rx_tmpbuf); hw->dma_rx_tmpbuf = 0; hw->dma_rx_tmpbuf_size = 0; @@ -378,7 +378,7 @@ static int au1550_spi_dma_txrxb(struct spi_device *spi, struct spi_transfer *t) dma_rx_addr = dma_map_single(hw->dev, (void *)t->rx_buf, t->len, DMA_FROM_DEVICE); - if (dma_mapping_error(dma_rx_addr)) + if (dma_mapping_error(hw->dev, dma_rx_addr)) dev_err(hw->dev, "rx dma map error\n"); } } else { @@ -401,7 +401,7 @@ static int au1550_spi_dma_txrxb(struct spi_device *spi, struct spi_transfer *t) dma_tx_addr = dma_map_single(hw->dev, (void *)t->tx_buf, t->len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_tx_addr)) + if (dma_mapping_error(hw->dev, dma_tx_addr)) dev_err(hw->dev, "tx dma map error\n"); } } else { diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c index b1cc148036c..f6f987bb71c 100644 --- a/drivers/spi/omap2_mcspi.c +++ b/drivers/spi/omap2_mcspi.c @@ -836,7 +836,7 @@ static int omap2_mcspi_transfer(struct spi_device *spi, struct spi_message *m) if (tx_buf != NULL) { t->tx_dma = dma_map_single(&spi->dev, (void *) tx_buf, len, DMA_TO_DEVICE); - if (dma_mapping_error(t->tx_dma)) { + if (dma_mapping_error(&spi->dev, t->tx_dma)) { dev_dbg(&spi->dev, "dma %cX %d bytes error\n", 'T', len); return -EINVAL; @@ -845,7 +845,7 @@ static int omap2_mcspi_transfer(struct spi_device *spi, struct spi_message *m) if (rx_buf != NULL) { t->rx_dma = dma_map_single(&spi->dev, rx_buf, t->len, DMA_FROM_DEVICE); - if (dma_mapping_error(t->rx_dma)) { + if (dma_mapping_error(&spi->dev, t->rx_dma)) { dev_dbg(&spi->dev, "dma %cX %d bytes error\n", 'R', len); if (tx_buf != NULL) diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c index 0c452c46ab0..067299d6d19 100644 --- a/drivers/spi/pxa2xx_spi.c +++ b/drivers/spi/pxa2xx_spi.c @@ -353,7 +353,7 @@ static int map_dma_buffers(struct driver_data *drv_data) drv_data->rx_dma = dma_map_single(dev, drv_data->rx, drv_data->rx_map_len, DMA_FROM_DEVICE); - if (dma_mapping_error(drv_data->rx_dma)) + if (dma_mapping_error(dev, drv_data->rx_dma)) return 0; /* Stream map the tx buffer */ @@ -361,7 +361,7 @@ static int map_dma_buffers(struct driver_data *drv_data) drv_data->tx_map_len, DMA_TO_DEVICE); - if (dma_mapping_error(drv_data->tx_dma)) { + if (dma_mapping_error(dev, drv_data->tx_dma)) { dma_unmap_single(dev, drv_data->rx_dma, drv_data->rx_map_len, DMA_FROM_DEVICE); return 0; diff --git a/drivers/spi/spi_imx.c b/drivers/spi/spi_imx.c index 54ac7bea5f8..6fb77fcc497 100644 --- a/drivers/spi/spi_imx.c +++ b/drivers/spi/spi_imx.c @@ -491,7 +491,7 @@ static int map_dma_buffers(struct driver_data *drv_data) buf, drv_data->tx_map_len, DMA_TO_DEVICE); - if (dma_mapping_error(drv_data->tx_dma)) + if (dma_mapping_error(dev, drv_data->tx_dma)) return -1; drv_data->tx_dma_needs_unmap = 1; @@ -516,7 +516,7 @@ static int map_dma_buffers(struct driver_data *drv_data) buf, drv_data->len, DMA_FROM_DEVICE); - if (dma_mapping_error(drv_data->rx_dma)) + if (dma_mapping_error(dev, drv_data->rx_dma)) return -1; drv_data->rx_dma_needs_unmap = 1; } @@ -534,7 +534,7 @@ static int map_dma_buffers(struct driver_data *drv_data) buf, drv_data->tx_map_len, DMA_TO_DEVICE); - if (dma_mapping_error(drv_data->tx_dma)) { + if (dma_mapping_error(dev, drv_data->tx_dma)) { if (drv_data->rx_dma) { dma_unmap_single(dev, drv_data->rx_dma, diff --git a/include/asm-alpha/dma-mapping.h b/include/asm-alpha/dma-mapping.h index db351d1296f..a5801ae02e4 100644 --- a/include/asm-alpha/dma-mapping.h +++ b/include/asm-alpha/dma-mapping.h @@ -24,8 +24,8 @@ pci_unmap_sg(alpha_gendev_to_pci(dev), sg, nents, dir) #define dma_supported(dev, mask) \ pci_dma_supported(alpha_gendev_to_pci(dev), mask) -#define dma_mapping_error(addr) \ - pci_dma_mapping_error(addr) +#define dma_mapping_error(dev, addr) \ + pci_dma_mapping_error(alpha_gendev_to_pci(dev), addr) #else /* no PCI - no IOMMU. */ @@ -45,7 +45,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, #define dma_unmap_page(dev, addr, size, dir) ((void)0) #define dma_unmap_sg(dev, sg, nents, dir) ((void)0) -#define dma_mapping_error(addr) (0) +#define dma_mapping_error(dev, addr) (0) #endif /* !CONFIG_PCI */ diff --git a/include/asm-alpha/pci.h b/include/asm-alpha/pci.h index d31fd49ff79..2a14302c17a 100644 --- a/include/asm-alpha/pci.h +++ b/include/asm-alpha/pci.h @@ -106,7 +106,7 @@ extern dma_addr_t pci_map_page(struct pci_dev *, struct page *, /* Test for pci_map_single or pci_map_page having generated an error. */ static inline int -pci_dma_mapping_error(dma_addr_t dma_addr) +pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr) { return dma_addr == 0; } diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h index e99406a7bec..f41335ba633 100644 --- a/include/asm-arm/dma-mapping.h +++ b/include/asm-arm/dma-mapping.h @@ -56,7 +56,7 @@ static inline int dma_is_consistent(struct device *dev, dma_addr_t handle) /* * DMA errors are defined by all-bits-set in the DMA address. */ -static inline int dma_mapping_error(dma_addr_t dma_addr) +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return dma_addr == ~0; } diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h index 57dc672bab8..0399359ab5d 100644 --- a/include/asm-avr32/dma-mapping.h +++ b/include/asm-avr32/dma-mapping.h @@ -35,7 +35,7 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask) /* * dma_map_single can't fail as it is implemented now. */ -static inline int dma_mapping_error(dma_addr_t addr) +static inline int dma_mapping_error(struct device *dev, dma_addr_t addr) { return 0; } diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h index edc8d1bfaae..cb2fb25ff8d 100644 --- a/include/asm-cris/dma-mapping.h +++ b/include/asm-cris/dma-mapping.h @@ -120,7 +120,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, } static inline int -dma_mapping_error(dma_addr_t dma_addr) +dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/include/asm-frv/dma-mapping.h b/include/asm-frv/dma-mapping.h index 2e8966ca030..b2898877c07 100644 --- a/include/asm-frv/dma-mapping.h +++ b/include/asm-frv/dma-mapping.h @@ -126,7 +126,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nele } static inline -int dma_mapping_error(dma_addr_t dma_addr) +int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/include/asm-generic/dma-mapping-broken.h b/include/asm-generic/dma-mapping-broken.h index e2468f894d2..82cd0cb1c3f 100644 --- a/include/asm-generic/dma-mapping-broken.h +++ b/include/asm-generic/dma-mapping-broken.h @@ -61,7 +61,7 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, #define dma_sync_sg_for_device dma_sync_sg_for_cpu extern int -dma_mapping_error(dma_addr_t dma_addr); +dma_mapping_error(struct device *dev, dma_addr_t dma_addr); extern int dma_supported(struct device *dev, u64 mask); diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h index 783ab9944d7..189486c3f92 100644 --- a/include/asm-generic/dma-mapping.h +++ b/include/asm-generic/dma-mapping.h @@ -144,9 +144,9 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, } static inline int -dma_mapping_error(dma_addr_t dma_addr) +dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - return pci_dma_mapping_error(dma_addr); + return pci_dma_mapping_error(to_pci_dev(dev), dma_addr); } diff --git a/include/asm-generic/pci-dma-compat.h b/include/asm-generic/pci-dma-compat.h index 25c10e96b2b..37b3706226e 100644 --- a/include/asm-generic/pci-dma-compat.h +++ b/include/asm-generic/pci-dma-compat.h @@ -99,9 +99,9 @@ pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, } static inline int -pci_dma_mapping_error(dma_addr_t dma_addr) +pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr) { - return dma_mapping_error(dma_addr); + return dma_mapping_error(&pdev->dev, dma_addr); } #endif diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h index 0721a5e8271..a6d50c77b6b 100644 --- a/include/asm-ia64/machvec.h +++ b/include/asm-ia64/machvec.h @@ -54,7 +54,7 @@ typedef void ia64_mv_dma_sync_single_for_cpu (struct device *, dma_addr_t, size_ typedef void ia64_mv_dma_sync_sg_for_cpu (struct device *, struct scatterlist *, int, int); typedef void ia64_mv_dma_sync_single_for_device (struct device *, dma_addr_t, size_t, int); typedef void ia64_mv_dma_sync_sg_for_device (struct device *, struct scatterlist *, int, int); -typedef int ia64_mv_dma_mapping_error (dma_addr_t dma_addr); +typedef int ia64_mv_dma_mapping_error(struct device *, dma_addr_t dma_addr); typedef int ia64_mv_dma_supported (struct device *, u64); typedef dma_addr_t ia64_mv_dma_map_single_attrs (struct device *, void *, size_t, int, struct dma_attrs *); diff --git a/include/asm-m68k/dma-mapping.h b/include/asm-m68k/dma-mapping.h index a26cdeb46a5..91f7944333d 100644 --- a/include/asm-m68k/dma-mapping.h +++ b/include/asm-m68k/dma-mapping.h @@ -84,7 +84,7 @@ static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *s { } -static inline int dma_mapping_error(dma_addr_t handle) +static inline int dma_mapping_error(struct device *dev, dma_addr_t handle) { return 0; } diff --git a/include/asm-mips/dma-mapping.h b/include/asm-mips/dma-mapping.h index 230b3f1b69b..c64afb40cd0 100644 --- a/include/asm-mips/dma-mapping.h +++ b/include/asm-mips/dma-mapping.h @@ -42,7 +42,7 @@ extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction); extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction); -extern int dma_mapping_error(dma_addr_t dma_addr); +extern int dma_mapping_error(struct device *dev, dma_addr_t dma_addr); extern int dma_supported(struct device *dev, u64 mask); static inline int diff --git a/include/asm-mn10300/dma-mapping.h b/include/asm-mn10300/dma-mapping.h index 7c882fca9ec..ccae8f6c632 100644 --- a/include/asm-mn10300/dma-mapping.h +++ b/include/asm-mn10300/dma-mapping.h @@ -182,7 +182,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, } static inline -int dma_mapping_error(dma_addr_t dma_addr) +int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/include/asm-parisc/dma-mapping.h b/include/asm-parisc/dma-mapping.h index c6c0e9ff6bd..53af696f23d 100644 --- a/include/asm-parisc/dma-mapping.h +++ b/include/asm-parisc/dma-mapping.h @@ -248,6 +248,6 @@ void * sba_get_iommu(struct parisc_device *dev); #endif /* At the moment, we panic on error for IOMMU resource exaustion */ -#define dma_mapping_error(x) 0 +#define dma_mapping_error(dev, x) 0 #endif diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h index 74c54978098..c7ca45f97dd 100644 --- a/include/asm-powerpc/dma-mapping.h +++ b/include/asm-powerpc/dma-mapping.h @@ -415,7 +415,7 @@ static inline void dma_sync_sg_for_device(struct device *dev, __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); } -static inline int dma_mapping_error(dma_addr_t dma_addr) +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { #ifdef CONFIG_PPC64 return (dma_addr == DMA_ERROR_CODE); diff --git a/include/asm-sh/dma-mapping.h b/include/asm-sh/dma-mapping.h index 22cc419389f..6c0b8a2de14 100644 --- a/include/asm-sh/dma-mapping.h +++ b/include/asm-sh/dma-mapping.h @@ -171,7 +171,7 @@ static inline int dma_get_cache_alignment(void) return L1_CACHE_BYTES; } -static inline int dma_mapping_error(dma_addr_t dma_addr) +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return dma_addr == 0; } diff --git a/include/asm-sparc/dma-mapping_64.h b/include/asm-sparc/dma-mapping_64.h index 38cbec76a33..bfa64f9702d 100644 --- a/include/asm-sparc/dma-mapping_64.h +++ b/include/asm-sparc/dma-mapping_64.h @@ -135,7 +135,7 @@ static inline void dma_sync_sg_for_device(struct device *dev, /* No flushing needed to sync cpu writes to the device. */ } -static inline int dma_mapping_error(dma_addr_t dma_addr) +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return (dma_addr == DMA_ERROR_CODE); } diff --git a/include/asm-sparc/pci_32.h b/include/asm-sparc/pci_32.h index b93b6c79e08..0ee949d220c 100644 --- a/include/asm-sparc/pci_32.h +++ b/include/asm-sparc/pci_32.h @@ -154,7 +154,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, #define PCI_DMA_ERROR_CODE (~(dma_addr_t)0x0) -static inline int pci_dma_mapping_error(dma_addr_t dma_addr) +static inline int pci_dma_mapping_error(struct pci_dev *pdev, + dma_addr_t dma_addr) { return (dma_addr == PCI_DMA_ERROR_CODE); } diff --git a/include/asm-sparc/pci_64.h b/include/asm-sparc/pci_64.h index f59f2571295..4f79a54948f 100644 --- a/include/asm-sparc/pci_64.h +++ b/include/asm-sparc/pci_64.h @@ -140,9 +140,10 @@ extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask); #define PCI64_REQUIRED_MASK (~(dma64_addr_t)0) #define PCI64_ADDR_BASE 0xfffc000000000000UL -static inline int pci_dma_mapping_error(dma_addr_t dma_addr) +static inline int pci_dma_mapping_error(struct pci_dev *pdev, + dma_addr_t dma_addr) { - return dma_mapping_error(dma_addr); + return dma_mapping_error(&pdev->dev, dma_addr); } #ifdef CONFIG_PCI diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h index 87a715367a1..3c034f48fdb 100644 --- a/include/asm-x86/device.h +++ b/include/asm-x86/device.h @@ -5,6 +5,9 @@ struct dev_archdata { #ifdef CONFIG_ACPI void *acpi_handle; #endif +#ifdef CONFIG_X86_64 +struct dma_mapping_ops *dma_ops; +#endif #ifdef CONFIG_DMAR void *iommu; /* hook for IOMMU specific extension */ #endif diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index c2ddd3d1b88..0eaa9bf6011 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -17,7 +17,8 @@ extern int panic_on_overflow; extern int force_iommu; struct dma_mapping_ops { - int (*mapping_error)(dma_addr_t dma_addr); + int (*mapping_error)(struct device *dev, + dma_addr_t dma_addr); void* (*alloc_coherent)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); void (*free_coherent)(struct device *dev, size_t size, @@ -56,14 +57,32 @@ struct dma_mapping_ops { int is_phys; }; -extern const struct dma_mapping_ops *dma_ops; +extern struct dma_mapping_ops *dma_ops; -static inline int dma_mapping_error(dma_addr_t dma_addr) +static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) { - if (dma_ops->mapping_error) - return dma_ops->mapping_error(dma_addr); +#ifdef CONFIG_X86_32 + return dma_ops; +#else + if (unlikely(!dev) || !dev->archdata.dma_ops) + return dma_ops; + else + return dev->archdata.dma_ops; +#endif +} + +/* Make sure we keep the same behaviour */ +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ +#ifdef CONFIG_X86_32 + return 0; +#else + struct dma_mapping_ops *ops = get_dma_ops(dev); + if (ops->mapping_error) + return ops->mapping_error(dev, dma_addr); return (dma_addr == bad_dma_address); +#endif } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) @@ -83,44 +102,53 @@ static inline dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - return dma_ops->map_single(hwdev, virt_to_phys(ptr), size, direction); + return ops->map_single(hwdev, virt_to_phys(ptr), size, direction); } static inline void dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->unmap_single) - dma_ops->unmap_single(dev, addr, size, direction); + if (ops->unmap_single) + ops->unmap_single(dev, addr, size, direction); } static inline int dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - return dma_ops->map_sg(hwdev, sg, nents, direction); + return ops->map_sg(hwdev, sg, nents, direction); } static inline void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->unmap_sg) - dma_ops->unmap_sg(hwdev, sg, nents, direction); + if (ops->unmap_sg) + ops->unmap_sg(hwdev, sg, nents, direction); } static inline void dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_for_cpu) - dma_ops->sync_single_for_cpu(hwdev, dma_handle, size, - direction); + if (ops->sync_single_for_cpu) + ops->sync_single_for_cpu(hwdev, dma_handle, size, direction); flush_write_buffers(); } @@ -128,10 +156,11 @@ static inline void dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_for_device) - dma_ops->sync_single_for_device(hwdev, dma_handle, size, - direction); + if (ops->sync_single_for_device) + ops->sync_single_for_device(hwdev, dma_handle, size, direction); flush_write_buffers(); } @@ -139,11 +168,12 @@ static inline void dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, unsigned long offset, size_t size, int direction) { - BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_range_for_cpu) - dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, - size, direction); + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); + if (ops->sync_single_range_for_cpu) + ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, + size, direction); flush_write_buffers(); } @@ -152,11 +182,12 @@ dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, unsigned long offset, size_t size, int direction) { - BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_range_for_device) - dma_ops->sync_single_range_for_device(hwdev, dma_handle, - offset, size, direction); + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); + if (ops->sync_single_range_for_device) + ops->sync_single_range_for_device(hwdev, dma_handle, + offset, size, direction); flush_write_buffers(); } @@ -164,9 +195,11 @@ static inline void dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, int nelems, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_sg_for_cpu) - dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); + if (ops->sync_sg_for_cpu) + ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); flush_write_buffers(); } @@ -174,9 +207,11 @@ static inline void dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, int nelems, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_sg_for_device) - dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction); + if (ops->sync_sg_for_device) + ops->sync_sg_for_device(hwdev, sg, nelems, direction); flush_write_buffers(); } @@ -185,9 +220,11 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + BUG_ON(!valid_dma_direction(direction)); - return dma_ops->map_single(dev, page_to_phys(page)+offset, - size, direction); + return ops->map_single(dev, page_to_phys(page) + offset, + size, direction); } static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h index c706a744263..2730b351afc 100644 --- a/include/asm-x86/swiotlb.h +++ b/include/asm-x86/swiotlb.h @@ -35,7 +35,7 @@ extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction); extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction); -extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr); +extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); extern void swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); diff --git a/include/asm-xtensa/dma-mapping.h b/include/asm-xtensa/dma-mapping.h index 3c7d537dd15..51882ae3db4 100644 --- a/include/asm-xtensa/dma-mapping.h +++ b/include/asm-xtensa/dma-mapping.h @@ -139,7 +139,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, consistent_sync(sg_virt(sg), sg->length, dir); } static inline int -dma_mapping_error(dma_addr_t dma_addr) +dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 7d51cbca49a..75ae6d8aba4 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -758,7 +758,7 @@ static inline dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr, } dma_addr = dma_map_single(&c->pdev->dev, ptr, size, direction); - if (!dma_mapping_error(dma_addr)) { + if (!dma_mapping_error(&c->pdev->dev, dma_addr)) { #ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 if ((sizeof(dma_addr_t) > 4) && c->pae_support) { *mptr++ = cpu_to_le32(0x7C020002); diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 4bf8cade9db..e530026eedf 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -427,9 +427,9 @@ static inline int ssb_dma_mapping_error(struct ssb_device *dev, dma_addr_t addr) { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: - return pci_dma_mapping_error(addr); + return pci_dma_mapping_error(dev->bus->host_pci, addr); case SSB_BUSTYPE_SSB: - return dma_mapping_error(addr); + return dma_mapping_error(dev->dev, addr); default: __ssb_dma_not_implemented(dev); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 90b529f7a15..936e333e7ce 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1590,7 +1590,7 @@ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { if (dev->dma_ops) return dev->dma_ops->mapping_error(dev, dma_addr); - return dma_mapping_error(dma_addr); + return dma_mapping_error(dev->dma_device, dma_addr); } /** diff --git a/lib/swiotlb.c b/lib/swiotlb.c index d568894df8c..977edbdbc1d 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -492,7 +492,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, */ dma_addr_t handle; handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE); - if (swiotlb_dma_mapping_error(handle)) + if (swiotlb_dma_mapping_error(hwdev, handle)) return NULL; ret = bus_to_virt(handle); @@ -824,7 +824,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, } int -swiotlb_dma_mapping_error(dma_addr_t dma_addr) +swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) { return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index a19b22b452a..84d328329d9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -169,7 +169,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, (void *) vec->sge[xdr_sge_no].iov_base + sge_off, sge_bytes, DMA_TO_DEVICE); - if (dma_mapping_error(sge[sge_no].addr)) + if (dma_mapping_error(xprt->sc_cm_id->device->dma_device, + sge[sge_no].addr)) goto err; sge_off = 0; sge_no++; -- cgit v1.2.3-70-g09d2 From 51cc50685a4275c6a02653670af9f108a64e01cf Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 25 Jul 2008 19:45:34 -0700 Subject: SL*B: drop kmem cache argument from constructor Kmem cache passed to constructor is only needed for constructors that are themselves multiplexeres. Nobody uses this "feature", nor does anybody uses passed kmem cache in non-trivial way, so pass only pointer to object. Non-trivial places are: arch/powerpc/mm/init_64.c arch/powerpc/mm/hugetlbpage.c This is flag day, yes. Signed-off-by: Alexey Dobriyan Acked-by: Pekka Enberg Acked-by: Christoph Lameter Cc: Jon Tollefson Cc: Nick Piggin Cc: Matt Mackall [akpm@linux-foundation.org: fix arch/powerpc/mm/hugetlbpage.c] [akpm@linux-foundation.org: fix mm/slab.c] [akpm@linux-foundation.org: fix ubifs] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/plat-s3c24xx/dma.c | 2 +- arch/powerpc/kernel/rtas_flash.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 9 ++------- arch/powerpc/mm/init_64.c | 24 +++++++++--------------- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- arch/sh/mm/pmb.c | 2 +- arch/xtensa/mm/init.c | 2 +- drivers/usb/mon/mon_text.c | 4 ++-- fs/adfs/super.c | 2 +- fs/affs/super.c | 2 +- fs/afs/super.c | 4 ++-- fs/befs/linuxvfs.c | 2 +- fs/bfs/inode.c | 2 +- fs/block_dev.c | 2 +- fs/buffer.c | 2 +- fs/cifs/cifsfs.c | 2 +- fs/coda/inode.c | 2 +- fs/ecryptfs/main.c | 4 ++-- fs/efs/super.c | 2 +- fs/ext2/super.c | 2 +- fs/ext3/super.c | 2 +- fs/ext4/super.c | 2 +- fs/fat/cache.c | 2 +- fs/fat/inode.c | 2 +- fs/fuse/inode.c | 2 +- fs/gfs2/main.c | 4 ++-- fs/hfs/super.c | 2 +- fs/hfsplus/super.c | 2 +- fs/hpfs/super.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/inode.c | 2 +- fs/isofs/inode.c | 2 +- fs/jffs2/super.c | 2 +- fs/jfs/jfs_metapage.c | 2 +- fs/jfs/super.c | 2 +- fs/locks.c | 2 +- fs/minix/inode.c | 2 +- fs/ncpfs/inode.c | 2 +- fs/nfs/inode.c | 2 +- fs/ntfs/super.c | 2 +- fs/ocfs2/dlm/dlmfs.c | 3 +-- fs/ocfs2/super.c | 2 +- fs/openpromfs/inode.c | 2 +- fs/proc/inode.c | 2 +- fs/qnx4/inode.c | 2 +- fs/reiserfs/super.c | 2 +- fs/romfs/inode.c | 2 +- fs/smbfs/inode.c | 2 +- fs/sysv/inode.c | 2 +- fs/ubifs/super.c | 2 +- fs/udf/super.c | 2 +- fs/ufs/super.c | 2 +- fs/xfs/linux-2.6/kmem.h | 2 +- fs/xfs/linux-2.6/xfs_super.c | 1 - include/linux/slab.h | 2 +- include/linux/slub_def.h | 2 +- ipc/mqueue.c | 2 +- kernel/fork.c | 2 +- lib/idr.c | 2 +- lib/radix-tree.c | 2 +- mm/rmap.c | 2 +- mm/shmem.c | 2 +- mm/slab.c | 11 +++++------ mm/slob.c | 7 +++---- mm/slub.c | 13 ++++++------- net/socket.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 67 files changed, 90 insertions(+), 106 deletions(-) (limited to 'net') diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c index 60f162dc4fa..8c5e656d5d8 100644 --- a/arch/arm/plat-s3c24xx/dma.c +++ b/arch/arm/plat-s3c24xx/dma.c @@ -1304,7 +1304,7 @@ struct sysdev_class dma_sysclass = { /* kmem cache implementation */ -static void s3c2410_dma_cache_ctor(struct kmem_cache *c, void *p) +static void s3c2410_dma_cache_ctor(void *p) { memset(p, 0, sizeof(struct s3c2410_dma_buf)); } diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 09ded5c424a..149cb112cd1 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -286,7 +286,7 @@ static ssize_t rtas_flash_read(struct file *file, char __user *buf, } /* constructor for flash_block_cache */ -void rtas_block_ctor(struct kmem_cache *cache, void *ptr) +void rtas_block_ctor(void *ptr) { memset(ptr, 0, RTAS_BLK_SIZE); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index fb42c4dd321..ed0aab0208a 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -113,7 +113,7 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, unsigned long address, unsigned int psize) { - pte_t *new = kmem_cache_alloc(huge_pgtable_cache(psize), + pte_t *new = kmem_cache_zalloc(huge_pgtable_cache(psize), GFP_KERNEL|__GFP_REPEAT); if (! new) @@ -730,11 +730,6 @@ static int __init hugepage_setup_sz(char *str) } __setup("hugepagesz=", hugepage_setup_sz); -static void zero_ctor(struct kmem_cache *cache, void *addr) -{ - memset(addr, 0, kmem_cache_size(cache)); -} - static int __init hugetlbpage_init(void) { unsigned int psize; @@ -756,7 +751,7 @@ static int __init hugetlbpage_init(void) HUGEPTE_TABLE_SIZE(psize), HUGEPTE_TABLE_SIZE(psize), 0, - zero_ctor); + NULL); if (!huge_pgtable_cache(psize)) panic("hugetlbpage_init(): could not create %s"\ "\n", HUGEPTE_CACHE_NAME(psize)); diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a41bc5aa204..4f7df85129d 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -136,9 +136,14 @@ static int __init setup_kcore(void) module_init(setup_kcore); #endif -static void zero_ctor(struct kmem_cache *cache, void *addr) +static void pgd_ctor(void *addr) { - memset(addr, 0, kmem_cache_size(cache)); + memset(addr, 0, PGD_TABLE_SIZE); +} + +static void pmd_ctor(void *addr) +{ + memset(addr, 0, PMD_TABLE_SIZE); } static const unsigned int pgtable_cache_size[2] = { @@ -163,19 +168,8 @@ struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; void pgtable_cache_init(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { - int size = pgtable_cache_size[i]; - const char *name = pgtable_cache_name[i]; - - pr_debug("Allocating page table cache %s (#%d) " - "for size: %08x...\n", name, i, size); - pgtable_cache[i] = kmem_cache_create(name, - size, size, - SLAB_PANIC, - zero_ctor); - } + pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor); + pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor); } #ifdef CONFIG_SPARSEMEM_VMEMMAP diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 7123472801d..690ca7b0dcf 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -78,7 +78,7 @@ spufs_destroy_inode(struct inode *inode) } static void -spufs_init_once(struct kmem_cache *cachep, void *p) +spufs_init_once(void *p) { struct spufs_inode_info *ei = p; diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c index 0b0ec6e0475..46911bcbf17 100644 --- a/arch/sh/mm/pmb.c +++ b/arch/sh/mm/pmb.c @@ -293,7 +293,7 @@ void pmb_unmap(unsigned long addr) } while (pmbe); } -static void pmb_cache_ctor(struct kmem_cache *cachep, void *pmb) +static void pmb_cache_ctor(void *pmb) { struct pmb_entry *pmbe = pmb; diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 81d0560eaea..ee261005b36 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -309,7 +309,7 @@ void show_mem(void) struct kmem_cache *pgtable_cache __read_mostly; -static void pgd_ctor(struct kmem_cache *cache, void* addr) +static void pgd_ctor(void* addr) { pte_t* ptep = (pte_t*)addr; int i; diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c index 5e3e4e9b6c7..1f715436d6d 100644 --- a/drivers/usb/mon/mon_text.c +++ b/drivers/usb/mon/mon_text.c @@ -87,7 +87,7 @@ struct mon_reader_text { static struct dentry *mon_dir; /* Usually /sys/kernel/debug/usbmon */ -static void mon_text_ctor(struct kmem_cache *, void *); +static void mon_text_ctor(void *); struct mon_text_ptr { int cnt, limit; @@ -720,7 +720,7 @@ void mon_text_del(struct mon_bus *mbus) /* * Slab interface: constructor. */ -static void mon_text_ctor(struct kmem_cache *slab, void *mem) +static void mon_text_ctor(void *mem) { /* * Nothing to initialize. No, really! diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 9e421eeb672..26f3b43726b 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -249,7 +249,7 @@ static void adfs_destroy_inode(struct inode *inode) kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; diff --git a/fs/affs/super.c b/fs/affs/super.c index 4e030956640..3a89094f93d 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -90,7 +90,7 @@ static void affs_destroy_inode(struct inode *inode) kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct affs_inode_info *ei = (struct affs_inode_info *) foo; diff --git a/fs/afs/super.c b/fs/afs/super.c index 7e3faeef681..250d8c4d66e 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -27,7 +27,7 @@ #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ -static void afs_i_init_once(struct kmem_cache *cachep, void *foo); +static void afs_i_init_once(void *foo); static int afs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt); @@ -449,7 +449,7 @@ static void afs_put_super(struct super_block *sb) /* * initialise an inode cache slab element prior to any use */ -static void afs_i_init_once(struct kmem_cache *cachep, void *_vnode) +static void afs_i_init_once(void *_vnode) { struct afs_vnode *vnode = _vnode; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index e8717de3bab..02c6e62b72f 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode) kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct befs_inode_info *bi = (struct befs_inode_info *) foo; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 053e690ec9e..0ed57b5ee01 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -264,7 +264,7 @@ static void bfs_destroy_inode(struct inode *inode) kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct bfs_inode_info *bi = foo; diff --git a/fs/block_dev.c b/fs/block_dev.c index 10d8a0aa871..dcf37cada36 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -271,7 +271,7 @@ static void bdev_destroy_inode(struct inode *inode) kmem_cache_free(bdev_cachep, bdi); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct bdev_inode *ei = (struct bdev_inode *) foo; struct block_device *bdev = &ei->bdev; diff --git a/fs/buffer.c b/fs/buffer.c index 109b261192d..5fd497cdd6f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3272,7 +3272,7 @@ int bh_submit_read(struct buffer_head *bh) EXPORT_SYMBOL(bh_submit_read); static void -init_buffer_head(struct kmem_cache *cachep, void *data) +init_buffer_head(void *data) { struct buffer_head *bh = data; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 22857c639df..fe5f6809cba 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -766,7 +766,7 @@ const struct file_operations cifs_dir_ops = { }; static void -cifs_init_once(struct kmem_cache *cachep, void *inode) +cifs_init_once(void *inode) { struct cifsInodeInfo *cifsi = inode; diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 2f58dfc7008..830f51abb97 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode) kmem_cache_free(coda_inode_cachep, ITOC(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct coda_inode_info *ei = (struct coda_inode_info *) foo; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 6f403cfba14..448dfd597b5 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -578,7 +578,7 @@ static struct file_system_type ecryptfs_fs_type = { * Initializes the ecryptfs_inode_info_cache when it is created */ static void -inode_info_init_once(struct kmem_cache *cachep, void *vptr) +inode_info_init_once(void *vptr) { struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr; @@ -589,7 +589,7 @@ static struct ecryptfs_cache_info { struct kmem_cache **cache; const char *name; size_t size; - void (*ctor)(struct kmem_cache *cache, void *obj); + void (*ctor)(void *obj); } ecryptfs_cache_infos[] = { { .cache = &ecryptfs_auth_tok_list_item_cache, diff --git a/fs/efs/super.c b/fs/efs/super.c index d733531b55e..567b134fa1f 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -70,7 +70,7 @@ static void efs_destroy_inode(struct inode *inode) kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct efs_inode_info *ei = (struct efs_inode_info *) foo; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 31308a3b0b8..fd88c7b43e6 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -159,7 +159,7 @@ static void ext2_destroy_inode(struct inode *inode) kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 615788c6843..8ddced38467 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -472,7 +472,7 @@ static void ext3_destroy_inode(struct inode *inode) kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1cb371dcd60..b5479b1dff1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -595,7 +595,7 @@ static void ext4_destroy_inode(struct inode *inode) kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 3a9ecac8d61..3222f51c41c 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -36,7 +36,7 @@ static inline int fat_max_cache(struct inode *inode) static struct kmem_cache *fat_cache_cachep; -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct fat_cache *cache = (struct fat_cache *)foo; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 23676f9d79c..6d266d793e2 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -498,7 +498,7 @@ static void fat_destroy_inode(struct inode *inode) kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7d2f7d6e22e..d2249f174e2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -956,7 +956,7 @@ static inline void unregister_fuseblk(void) } #endif -static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo) +static void fuse_inode_init_once(void *foo) { struct inode * inode = foo; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index bcc668d0fad..bb2cc303ac2 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -24,7 +24,7 @@ #include "util.h" #include "glock.h" -static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) +static void gfs2_init_inode_once(void *foo) { struct gfs2_inode *ip = foo; @@ -33,7 +33,7 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) ip->i_alloc = NULL; } -static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) +static void gfs2_init_glock_once(void *foo) { struct gfs2_glock *gl = foo; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index ac2ec5ef66e..4abb1047c68 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -432,7 +432,7 @@ static struct file_system_type hfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -static void hfs_init_once(struct kmem_cache *cachep, void *p) +static void hfs_init_once(void *p) { struct hfs_inode_info *i = p; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 3859118531c..e834e578c93 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -485,7 +485,7 @@ static struct file_system_type hfsplus_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -static void hfsplus_init_once(struct kmem_cache *cachep, void *p) +static void hfsplus_init_once(void *p) { struct hfsplus_inode_info *i = p; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index f63a699ec65..b8ae9c90ada 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -173,7 +173,7 @@ static void hpfs_destroy_inode(struct inode *inode) kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index dbd01d262ca..3f58923fb39 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -705,7 +705,7 @@ static const struct address_space_operations hugetlbfs_aops = { }; -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; diff --git a/fs/inode.c b/fs/inode.c index 35b6414522e..b6726f64453 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -224,7 +224,7 @@ void inode_init_once(struct inode *inode) EXPORT_SYMBOL(inode_init_once); -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct inode * inode = (struct inode *) foo; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 044a254d526..26948a6033b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode) kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct iso_inode_info *ei = foo; diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 7da69eae49e..efd401257ed 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode) kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); } -static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo) +static void jffs2_i_init_once(void *foo) { struct jffs2_inode_info *f = foo; diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 854ff0ec574..c350057087d 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -182,7 +182,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp) #endif -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct metapage *mp = (struct metapage *)foo; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 359c091d896..3630718be39 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -760,7 +760,7 @@ static struct file_system_type jfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; diff --git a/fs/locks.c b/fs/locks.c index 01490300f7c..5eb259e3cd3 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -201,7 +201,7 @@ EXPORT_SYMBOL(locks_init_lock); * Initialises the fields of the file lock which are invariant for * free file_locks. */ -static void init_once(struct kmem_cache *cache, void *foo) +static void init_once(void *foo) { struct file_lock *lock = (struct file_lock *) foo; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 523d7371341..d1d1eb84679 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -68,7 +68,7 @@ static void minix_destroy_inode(struct inode *inode) kmem_cache_free(minix_inode_cachep, minix_i(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct minix_inode_info *ei = (struct minix_inode_info *) foo; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 2e5ab1204de..d642f0e5b36 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -64,7 +64,7 @@ static void ncp_destroy_inode(struct inode *inode) kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index df23f987da6..52daefa2f52 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1242,7 +1242,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) #endif } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct nfs_inode *nfsi = (struct nfs_inode *) foo; diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 3e76f3b216b..4a46743b507 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3080,7 +3080,7 @@ struct kmem_cache *ntfs_inode_cache; struct kmem_cache *ntfs_big_inode_cache; /* Init once constructor for the inode slab cache. */ -static void ntfs_big_inode_init_once(struct kmem_cache *cachep, void *foo) +static void ntfs_big_inode_init_once(void *foo) { ntfs_inode *ni = (ntfs_inode *)foo; diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index e48aba698b7..533a789c3ef 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -267,8 +267,7 @@ static ssize_t dlmfs_file_write(struct file *filp, return writelen; } -static void dlmfs_init_once(struct kmem_cache *cachep, - void *foo) +static void dlmfs_init_once(void *foo) { struct dlmfs_inode_private *ip = (struct dlmfs_inode_private *) foo; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index ccecfe5094f..2560b33889a 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1118,7 +1118,7 @@ bail: return status; } -static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data) +static void ocfs2_inode_init_once(void *data) { struct ocfs2_inode_info *oi = data; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index d17b4fd204e..9f5b054f06b 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -430,7 +430,7 @@ static struct file_system_type openprom_fs_type = { .kill_sb = kill_anon_super, }; -static void op_inode_init_once(struct kmem_cache * cachep, void *data) +static void op_inode_init_once(void *data) { struct op_inode_info *oi = (struct op_inode_info *) data; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 02eca2ed9dd..b37f25dc45a 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -94,7 +94,7 @@ static void proc_destroy_inode(struct inode *inode) kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct proc_inode *ei = (struct proc_inode *) foo; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index b31ab78052b..2aad1044b84 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -553,7 +553,7 @@ static void qnx4_destroy_inode(struct inode *inode) kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 2ec748ba0bd..879e54d35c2 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -521,7 +521,7 @@ static void reiserfs_destroy_inode(struct inode *inode) kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 3f13d491c7c..8e51a2aaa97 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -577,7 +577,7 @@ static void romfs_destroy_inode(struct inode *inode) kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct romfs_inode_info *ei = foo; diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 376ef3ee6ed..3528f40ffb0 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -67,7 +67,7 @@ static void smb_destroy_inode(struct inode *inode) kmem_cache_free(smb_inode_cachep, SMB_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct smb_inode_info *ei = (struct smb_inode_info *) foo; diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index c5d60de0658..df0d435baa4 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -326,7 +326,7 @@ static void sysv_destroy_inode(struct inode *inode) kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *p) +static void init_once(void *p) { struct sysv_inode_info *si = (struct sysv_inode_info *)p; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 00eb9c68ad0..ca1e2d4e03c 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1841,7 +1841,7 @@ static struct file_system_type ubifs_fs_type = { /* * Inode slab cache constructor. */ -static void inode_slab_ctor(struct kmem_cache *cachep, void *obj) +static void inode_slab_ctor(void *obj) { struct ubifs_inode *ui = obj; inode_init_once(&ui->vfs_inode); diff --git a/fs/udf/super.c b/fs/udf/super.c index 44cc702f96c..5698bbf83bb 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -148,7 +148,7 @@ static void udf_destroy_inode(struct inode *inode) kmem_cache_free(udf_inode_cachep, UDF_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct udf_inode_info *ei = (struct udf_inode_info *)foo; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 227c9d70004..3e30e40aa24 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1302,7 +1302,7 @@ static void ufs_destroy_inode(struct inode *inode) kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 5e956490297..a20683cf74d 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -79,7 +79,7 @@ kmem_zone_init(int size, char *zone_name) static inline kmem_zone_t * kmem_zone_init_flags(int size, char *zone_name, unsigned long flags, - void (*construct)(kmem_zone_t *, void *)) + void (*construct)(void *)) { return kmem_cache_create(zone_name, size, 0, flags, construct); } diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 742b2c7852c..943381284e2 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -843,7 +843,6 @@ xfs_fs_destroy_inode( STATIC void xfs_fs_inode_init_once( - kmem_zone_t *zonep, void *vnode) { inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); diff --git a/include/linux/slab.h b/include/linux/slab.h index 41103910f8a..9ff8e849940 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -58,7 +58,7 @@ int slab_is_available(void); struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, - void (*)(struct kmem_cache *, void *)); + void (*)(void *)); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); void kmem_cache_free(struct kmem_cache *, void *); diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index d117ea2825a..5bad61a93f6 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -85,7 +85,7 @@ struct kmem_cache { struct kmem_cache_order_objects min; gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ - void (*ctor)(struct kmem_cache *, void *); + void (*ctor)(void *); int inuse; /* Offset to metadata */ int align; /* Alignment */ const char *name; /* Name (only for display!) */ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 1fdc2eb2f6d..474984f9e03 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -207,7 +207,7 @@ static int mqueue_get_sb(struct file_system_type *fs_type, return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo; diff --git a/kernel/fork.c b/kernel/fork.c index b99d73e971a..80e83e459b1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1442,7 +1442,7 @@ long do_fork(unsigned long clone_flags, #define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif -static void sighand_ctor(struct kmem_cache *cachep, void *data) +static void sighand_ctor(void *data) { struct sighand_struct *sighand = data; diff --git a/lib/idr.c b/lib/idr.c index 3476f8203e9..e728c7fccc4 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -607,7 +607,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id) } EXPORT_SYMBOL(idr_replace); -static void idr_cache_ctor(struct kmem_cache *idr_layer_cache, void *idr_layer) +static void idr_cache_ctor(void *idr_layer) { memset(idr_layer, 0, sizeof(struct idr_layer)); } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 9c4f1ffa286..be86b32bc87 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1183,7 +1183,7 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) EXPORT_SYMBOL(radix_tree_tagged); static void -radix_tree_node_ctor(struct kmem_cache *cachep, void *node) +radix_tree_node_ctor(void *node) { memset(node, 0, sizeof(struct radix_tree_node)); } diff --git a/mm/rmap.c b/mm/rmap.c index abbd29f7c43..39ae5a9bf38 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -138,7 +138,7 @@ void anon_vma_unlink(struct vm_area_struct *vma) anon_vma_free(anon_vma); } -static void anon_vma_ctor(struct kmem_cache *cachep, void *data) +static void anon_vma_ctor(void *data) { struct anon_vma *anon_vma = data; diff --git a/mm/shmem.c b/mm/shmem.c index 1089092aeca..952d361774b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2352,7 +2352,7 @@ static void shmem_destroy_inode(struct inode *inode) kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct shmem_inode_info *p = (struct shmem_inode_info *) foo; diff --git a/mm/slab.c b/mm/slab.c index 052e7d64537..918f04f7fef 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -406,7 +406,7 @@ struct kmem_cache { unsigned int dflags; /* dynamic flags */ /* constructor func */ - void (*ctor)(struct kmem_cache *, void *); + void (*ctor)(void *obj); /* 5) cache creation/removal */ const char *name; @@ -2137,8 +2137,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) */ struct kmem_cache * kmem_cache_create (const char *name, size_t size, size_t align, - unsigned long flags, - void (*ctor)(struct kmem_cache *, void *)) + unsigned long flags, void (*ctor)(void *)) { size_t left_over, slab_size, ralign; struct kmem_cache *cachep = NULL, *pc; @@ -2653,7 +2652,7 @@ static void cache_init_objs(struct kmem_cache *cachep, * They must also be threaded. */ if (cachep->ctor && !(cachep->flags & SLAB_POISON)) - cachep->ctor(cachep, objp + obj_offset(cachep)); + cachep->ctor(objp + obj_offset(cachep)); if (cachep->flags & SLAB_RED_ZONE) { if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) @@ -2669,7 +2668,7 @@ static void cache_init_objs(struct kmem_cache *cachep, cachep->buffer_size / PAGE_SIZE, 0); #else if (cachep->ctor) - cachep->ctor(cachep, objp); + cachep->ctor(objp); #endif slab_bufctl(slabp)[i] = i + 1; } @@ -3093,7 +3092,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, #endif objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) - cachep->ctor(cachep, objp); + cachep->ctor(objp); #if ARCH_SLAB_MINALIGN if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", diff --git a/mm/slob.c b/mm/slob.c index de268eb7ac7..d8fbd4d1bfa 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -525,12 +525,11 @@ struct kmem_cache { unsigned int size, align; unsigned long flags; const char *name; - void (*ctor)(struct kmem_cache *, void *); + void (*ctor)(void *); }; struct kmem_cache *kmem_cache_create(const char *name, size_t size, - size_t align, unsigned long flags, - void (*ctor)(struct kmem_cache *, void *)) + size_t align, unsigned long flags, void (*ctor)(void *)) { struct kmem_cache *c; @@ -575,7 +574,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) b = slob_new_page(flags, get_order(c->size), node); if (c->ctor) - c->ctor(c, b); + c->ctor(b); return b; } diff --git a/mm/slub.c b/mm/slub.c index 77c21cf53ff..b7e2cd5d82d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1012,7 +1012,7 @@ __setup("slub_debug", setup_slub_debug); static unsigned long kmem_cache_flags(unsigned long objsize, unsigned long flags, const char *name, - void (*ctor)(struct kmem_cache *, void *)) + void (*ctor)(void *)) { /* * Enable debugging if selected on the kernel commandline. @@ -1040,7 +1040,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page, static inline void add_full(struct kmem_cache_node *n, struct page *page) {} static inline unsigned long kmem_cache_flags(unsigned long objsize, unsigned long flags, const char *name, - void (*ctor)(struct kmem_cache *, void *)) + void (*ctor)(void *)) { return flags; } @@ -1103,7 +1103,7 @@ static void setup_object(struct kmem_cache *s, struct page *page, { setup_object_debug(s, page, object); if (unlikely(s->ctor)) - s->ctor(s, object); + s->ctor(object); } static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) @@ -2286,7 +2286,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, const char *name, size_t size, size_t align, unsigned long flags, - void (*ctor)(struct kmem_cache *, void *)) + void (*ctor)(void *)) { memset(s, 0, kmem_size); s->name = name; @@ -3042,7 +3042,7 @@ static int slab_unmergeable(struct kmem_cache *s) static struct kmem_cache *find_mergeable(size_t size, size_t align, unsigned long flags, const char *name, - void (*ctor)(struct kmem_cache *, void *)) + void (*ctor)(void *)) { struct kmem_cache *s; @@ -3082,8 +3082,7 @@ static struct kmem_cache *find_mergeable(size_t size, } struct kmem_cache *kmem_cache_create(const char *name, size_t size, - size_t align, unsigned long flags, - void (*ctor)(struct kmem_cache *, void *)) + size_t align, unsigned long flags, void (*ctor)(void *)) { struct kmem_cache *s; diff --git a/net/socket.c b/net/socket.c index 1310a82cbba..8ef8ba81b9e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -265,7 +265,7 @@ static void sock_destroy_inode(struct inode *inode) container_of(inode, struct socket_alloc, vfs_inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct socket_alloc *ei = (struct socket_alloc *)foo; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 5a9b0e7828c..23a2b8f6dc4 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -897,7 +897,7 @@ static struct file_system_type rpc_pipe_fs_type = { }; static void -init_once(struct kmem_cache * cachep, void *foo) +init_once(void *foo) { struct rpc_inode *rpci = (struct rpc_inode *) foo; -- cgit v1.2.3-70-g09d2 From e40f51a36a6ca718e829c0933ab1e79333ac932e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 26 Jul 2008 17:47:53 -0700 Subject: netfilter: ebtables: use nf_register_hooks() Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/netfilter/ebtable_filter.c | 18 +++++------------- net/bridge/netfilter/ebtable_nat.c | 18 +++++------------- 2 files changed, 10 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 690bc3ab186..1a58af51a2e 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -93,28 +93,20 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { static int __init ebtable_filter_init(void) { - int i, j, ret; + int ret; ret = ebt_register_table(&frame_filter); if (ret < 0) return ret; - for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++) - if ((ret = nf_register_hook(&ebt_ops_filter[i])) < 0) - goto cleanup; - return ret; -cleanup: - for (j = 0; j < i; j++) - nf_unregister_hook(&ebt_ops_filter[j]); - ebt_unregister_table(&frame_filter); + ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); + if (ret < 0) + ebt_unregister_table(&frame_filter); return ret; } static void __exit ebtable_filter_fini(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++) - nf_unregister_hook(&ebt_ops_filter[i]); + nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); ebt_unregister_table(&frame_filter); } diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 5b495fe2d0b..f60c1e78e57 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -100,28 +100,20 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { static int __init ebtable_nat_init(void) { - int i, ret, j; + int ret; ret = ebt_register_table(&frame_nat); if (ret < 0) return ret; - for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++) - if ((ret = nf_register_hook(&ebt_ops_nat[i])) < 0) - goto cleanup; - return ret; -cleanup: - for (j = 0; j < i; j++) - nf_unregister_hook(&ebt_ops_nat[j]); - ebt_unregister_table(&frame_nat); + ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); + if (ret < 0) + ebt_unregister_table(&frame_nat); return ret; } static void __exit ebtable_nat_fini(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++) - nf_unregister_hook(&ebt_ops_nat[i]); + nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); ebt_unregister_table(&frame_nat); } -- cgit v1.2.3-70-g09d2 From f858b4869a9136dd28cc2ab37f8b89268cc99462 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 26 Jul 2008 17:48:38 -0700 Subject: netfilter: ip{,6}tables_security: fix future section mismatch Currently not visible, because NET_NS is mutually exclusive with SYSFS which is required by SECURITY. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/iptable_security.c | 2 +- net/ipv6/netfilter/ip6table_security.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 2b472ac2263..db6d312128e 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -32,7 +32,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index a07abee3049..6e7131036bc 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -31,7 +31,7 @@ static struct struct ip6t_replace repl; struct ip6t_standard entries[3]; struct ip6t_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, -- cgit v1.2.3-70-g09d2 From 3918fed5f31213067c1c345bd904e1ea369e6819 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 26 Jul 2008 17:48:59 -0700 Subject: netfilter: arptables in netns for real IN, FORWARD -- grab netns from in device, OUT -- from out device. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/arptable_filter.c | 39 +++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 3be4d07e7ed..082f5dd3156 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -55,32 +55,53 @@ static struct xt_table packet_filter = { }; /* The work comes in here from netfilter.c */ -static unsigned int arpt_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int arpt_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return arpt_do_table(skb, hook, in, out, init_net.ipv4.arptable_filter); + return arpt_do_table(skb, hook, in, out, + dev_net(in)->ipv4.arptable_filter); +} + +static unsigned int arpt_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return arpt_do_table(skb, hook, in, out, + dev_net(out)->ipv4.arptable_filter); +} + +static unsigned int arpt_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return arpt_do_table(skb, hook, in, out, + dev_net(in)->ipv4.arptable_filter); } static struct nf_hook_ops arpt_ops[] __read_mostly = { { - .hook = arpt_hook, + .hook = arpt_in_hook, .owner = THIS_MODULE, .pf = NF_ARP, .hooknum = NF_ARP_IN, .priority = NF_IP_PRI_FILTER, }, { - .hook = arpt_hook, + .hook = arpt_out_hook, .owner = THIS_MODULE, .pf = NF_ARP, .hooknum = NF_ARP_OUT, .priority = NF_IP_PRI_FILTER, }, { - .hook = arpt_hook, + .hook = arpt_forward_hook, .owner = THIS_MODULE, .pf = NF_ARP, .hooknum = NF_ARP_FORWARD, -- cgit v1.2.3-70-g09d2 From 93bc4e89c260d91576840c4881d1066d84ccd422 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 26 Jul 2008 17:49:33 -0700 Subject: netfilter: fix double-free and use-after free As suggested by Patrick McHardy, introduce a __krealloc() that doesn't free the original buffer to fix a double-free and use-after-free bug introduced by me in netfilter that uses RCU. Reported-by: Patrick McHardy Signed-off-by: Pekka Enberg Tested-by: Dieter Ries Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/slab.h | 1 + mm/util.c | 44 ++++++++++++++++++++++++++++--------- net/netfilter/nf_conntrack_extend.c | 2 +- 3 files changed, 36 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/linux/slab.h b/include/linux/slab.h index 9aa90a6f20e..be6f1d40b66 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -96,6 +96,7 @@ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr); /* * Common kmalloc functions provided by all allocators */ +void * __must_check __krealloc(const void *, size_t, gfp_t); void * __must_check krealloc(const void *, size_t, gfp_t); void kfree(const void *); size_t ksize(const void *); diff --git a/mm/util.c b/mm/util.c index 8f18683825b..6ef9e9943f6 100644 --- a/mm/util.c +++ b/mm/util.c @@ -68,25 +68,22 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp) EXPORT_SYMBOL(kmemdup); /** - * krealloc - reallocate memory. The contents will remain unchanged. + * __krealloc - like krealloc() but don't free @p. * @p: object to reallocate memory for. * @new_size: how many bytes of memory are required. * @flags: the type of memory to allocate. * - * The contents of the object pointed to are preserved up to the - * lesser of the new and old sizes. If @p is %NULL, krealloc() - * behaves exactly like kmalloc(). If @size is 0 and @p is not a - * %NULL pointer, the object pointed to is freed. + * This function is like krealloc() except it never frees the originally + * allocated buffer. Use this if you don't want to free the buffer immediately + * like, for example, with RCU. */ -void *krealloc(const void *p, size_t new_size, gfp_t flags) +void *__krealloc(const void *p, size_t new_size, gfp_t flags) { void *ret; size_t ks = 0; - if (unlikely(!new_size)) { - kfree(p); + if (unlikely(!new_size)) return ZERO_SIZE_PTR; - } if (p) ks = ksize(p); @@ -95,10 +92,37 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags) return (void *)p; ret = kmalloc_track_caller(new_size, flags); - if (ret && p) { + if (ret && p) memcpy(ret, p, ks); + + return ret; +} +EXPORT_SYMBOL(__krealloc); + +/** + * krealloc - reallocate memory. The contents will remain unchanged. + * @p: object to reallocate memory for. + * @new_size: how many bytes of memory are required. + * @flags: the type of memory to allocate. + * + * The contents of the object pointed to are preserved up to the + * lesser of the new and old sizes. If @p is %NULL, krealloc() + * behaves exactly like kmalloc(). If @size is 0 and @p is not a + * %NULL pointer, the object pointed to is freed. + */ +void *krealloc(const void *p, size_t new_size, gfp_t flags) +{ + void *ret; + + if (unlikely(!new_size)) { kfree(p); + return ZERO_SIZE_PTR; } + + ret = __krealloc(p, new_size, flags); + if (ret && p != ret) + kfree(p); + return ret; } EXPORT_SYMBOL(krealloc); diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 3469bc71a38..c956ef7eeec 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -95,7 +95,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) newlen = newoff + t->len; rcu_read_unlock(); - new = krealloc(ct->ext, newlen, gfp); + new = __krealloc(ct->ext, newlen, gfp); if (!new) return NULL; -- cgit v1.2.3-70-g09d2 From 6c64825bf40ecc1b01610762ca736b18c8a9db92 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 26 Jul 2008 17:50:05 -0700 Subject: netfilter: nf_conntrack_extend: avoid unnecessary "ct->ext" dereferences As Linus points out, "ct->ext" and "new" are always equal, avoid unnecessary dereferences and use "new" directly. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_extend.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index c956ef7eeec..4b2c769d555 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -115,10 +115,10 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) ct->ext = new; } - ct->ext->offset[id] = newoff; - ct->ext->len = newlen; - memset((void *)ct->ext + newoff, 0, newlen - newoff); - return (void *)ct->ext + newoff; + new->offset[id] = newoff; + new->len = newlen; + memset((void *)new + newoff, 0, newlen - newoff); + return (void *)new + newoff; } EXPORT_SYMBOL(__nf_ct_ext_add); -- cgit v1.2.3-70-g09d2 From 6c3b8fc618905d7599dcc514c99ce4293d476f39 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 26 Jul 2008 17:51:06 -0700 Subject: netns: fix ip_rt_frag_needed rt_is_expired Running recent kernels, and using a particular vpn gateway, I've been having to edit my mails down to get them accepted by the smtp server. Git bisect led to commit e84f84f276473dcc673f360e8ff3203148bdf0e2 - netns: place rt_genid into struct net. The conversion from a != test to rt_is_expired() put one negative too many: and now my mail works. Signed-off-by: Hugh Dickins Acked-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e4ab0ac94f9..a507c5e27d0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1502,7 +1502,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->fl.iif != 0 || dst_metric_locked(&rth->u.dst, RTAX_MTU) || !net_eq(dev_net(rth->u.dst.dev), net) || - !rt_is_expired(rth)) + rt_is_expired(rth)) continue; if (new_mtu < 68 || new_mtu >= old_mtu) { -- cgit v1.2.3-70-g09d2 From 734550921e9b7ab924a43aa3d0bd4239dac4fbf1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 14 Jul 2008 21:22:20 -0400 Subject: [PATCH] beginning of sysctl cleanup - ctl_table_set New object: set of sysctls [currently - root and per-net-ns]. Contains: pointer to parent set, list of tables and "should I see this set?" method (->is_seen(set)). Current lists of tables are subsumed by that; net-ns contains such a beast. ->lookup() for ctl_table_root returns pointer to ctl_table_set instead of that to ->list of that ctl_table_set. [folded compile fixes by rdd for configs without sysctl] Signed-off-by: Al Viro --- include/linux/sysctl.h | 15 +++++++++++++-- include/net/net_namespace.h | 4 +++- kernel/sysctl.c | 41 +++++++++++++++++++++++++++++++---------- net/sysctl_net.c | 22 ++++++++++------------ 4 files changed, 57 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 24141b4d1a1..c1e0cf408af 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -947,6 +947,16 @@ struct ctl_table; struct nsproxy; struct ctl_table_root; +struct ctl_table_set { + struct list_head list; + struct ctl_table_set *parent; + int (*is_seen)(struct ctl_table_set *); +}; + +extern void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)); + extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, struct ctl_table_header *prev); @@ -1049,8 +1059,8 @@ struct ctl_table struct ctl_table_root { struct list_head root_list; - struct list_head header_list; - struct list_head *(*lookup)(struct ctl_table_root *root, + struct ctl_table_set default_set; + struct ctl_table_set *(*lookup)(struct ctl_table_root *root, struct nsproxy *namespaces); int (*permissions)(struct ctl_table_root *root, struct nsproxy *namespaces, struct ctl_table *table); @@ -1066,6 +1076,7 @@ struct ctl_table_header struct completion *unregistering; struct ctl_table *ctl_table_arg; struct ctl_table_root *root; + struct ctl_table_set *set; }; /* struct ctl_path describes where in the hierarchy a table is added */ diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 3855620b78a..a8eb43cf0c7 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -38,7 +38,9 @@ struct net { struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; - struct list_head sysctl_table_headers; +#ifdef CONFIG_SYSCTL + struct ctl_table_set sysctls; +#endif struct net_device *loopback_dev; /* The loopback */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 35a50db9b6c..8ee4a0619fb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -160,12 +160,13 @@ static struct ctl_table root_table[]; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.header_list), + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), .root = &sysctl_table_root, + .set = &sysctl_table_root.default_set, }; static struct ctl_table_root sysctl_table_root = { .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), - .header_list = LIST_HEAD_INIT(root_table_header.ctl_entry), + .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), }; static struct ctl_table kern_table[]; @@ -1403,14 +1404,20 @@ void sysctl_head_finish(struct ctl_table_header *head) spin_unlock(&sysctl_lock); } +static struct ctl_table_set * +lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) +{ + struct ctl_table_set *set = &root->default_set; + if (root->lookup) + set = root->lookup(root, namespaces); + return set; +} + static struct list_head * lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) { - struct list_head *header_list; - header_list = &root->header_list; - if (root->lookup) - header_list = root->lookup(root, namespaces); - return header_list; + struct ctl_table_set *set = lookup_header_set(root, namespaces); + return &set->list; } struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, @@ -1720,7 +1727,6 @@ struct ctl_table_header *__register_sysctl_paths( struct nsproxy *namespaces, const struct ctl_path *path, struct ctl_table *table) { - struct list_head *header_list; struct ctl_table_header *header; struct ctl_table *new, **prevp; unsigned int n, npath; @@ -1772,8 +1778,8 @@ struct ctl_table_header *__register_sysctl_paths( } #endif spin_lock(&sysctl_lock); - header_list = lookup_header_list(root, namespaces); - list_add_tail(&header->ctl_entry, header_list); + header->set = lookup_header_set(root, namespaces); + list_add_tail(&header->ctl_entry, &header->set->list); spin_unlock(&sysctl_lock); return header; @@ -1832,6 +1838,15 @@ void unregister_sysctl_table(struct ctl_table_header * header) kfree(header); } +void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) +{ + INIT_LIST_HEAD(&p->list); + p->parent = parent ? parent : &sysctl_table_root.default_set; + p->is_seen = is_seen; +} + #else /* !CONFIG_SYSCTL */ struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { @@ -1848,6 +1863,12 @@ void unregister_sysctl_table(struct ctl_table_header * table) { } +void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) +{ +} + #endif /* CONFIG_SYSCTL */ /* diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 63ada437fc2..cefbc367d8b 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -29,10 +29,15 @@ #include #endif -static struct list_head * +static struct ctl_table_set * net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces) { - return &namespaces->net_ns->sysctl_table_headers; + return &namespaces->net_ns->sysctls; +} + +static int is_seen(struct ctl_table_set *set) +{ + return ¤t->nsproxy->net_ns->sysctls == set; } /* Return standard mode bits for table entry. */ @@ -53,13 +58,6 @@ static struct ctl_table_root net_sysctl_root = { .permissions = net_ctl_permissions, }; -static LIST_HEAD(net_sysctl_ro_tables); -static struct list_head *net_ctl_ro_header_lookup(struct ctl_table_root *root, - struct nsproxy *namespaces) -{ - return &net_sysctl_ro_tables; -} - static int net_ctl_ro_header_perms(struct ctl_table_root *root, struct nsproxy *namespaces, struct ctl_table *table) { @@ -70,19 +68,18 @@ static int net_ctl_ro_header_perms(struct ctl_table_root *root, } static struct ctl_table_root net_sysctl_ro_root = { - .lookup = net_ctl_ro_header_lookup, .permissions = net_ctl_ro_header_perms, }; static int sysctl_net_init(struct net *net) { - INIT_LIST_HEAD(&net->sysctl_table_headers); + setup_sysctl_set(&net->sysctls, NULL, is_seen); return 0; } static void sysctl_net_exit(struct net *net) { - WARN_ON(!list_empty(&net->sysctl_table_headers)); + WARN_ON(!list_empty(&net->sysctls.list)); return; } @@ -98,6 +95,7 @@ static __init int sysctl_init(void) if (ret) goto out; register_sysctl_root(&net_sysctl_root); + setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL, NULL); register_sysctl_root(&net_sysctl_ro_root); out: return ret; -- cgit v1.2.3-70-g09d2 From bd7b1533cd6a68c734062aa69394bec7e2b1718e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Jul 2008 16:00:59 -0400 Subject: [PATCH] sysctl: make sure that /proc/sys/net/ipv4 appears before per-ns ones Massage ipv4 initialization - make sure that net.ipv4 appears as non-per-net-namespace before it shows up in per-net-namespace sysctls. That's the only change outside of sysctl.c needed to get sane ordering rules and data structures for sysctls (esp. for procfs side of that mess). Signed-off-by: Al Viro --- include/net/ip.h | 2 ++ net/ipv4/af_inet.c | 4 ++++ net/ipv4/sysctl_net_ipv4.c | 7 +++++++ 3 files changed, 13 insertions(+) (limited to 'net') diff --git a/include/net/ip.h b/include/net/ip.h index b5862b97520..250e6ef025a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -188,6 +188,8 @@ extern int sysctl_ip_dynaddr; extern void ipfrag_init(void); +extern void ip_static_sysctl_init(void); + #ifdef CONFIG_INET #include diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f440a9f5492..354f6b54e49 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1439,6 +1439,10 @@ static int __init inet_init(void) (void)sock_register(&inet_family_ops); +#ifdef CONFIG_SYSCTL + ip_static_sysctl_init(); +#endif + /* * Add all the base protocols. */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 14ef202a225..d63e9388d92 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -882,4 +882,11 @@ static __init int sysctl_ipv4_init(void) return 0; } +/* set enough of tree skeleton to get rid of ordering problems */ +void __init ip_static_sysctl_init(void) +{ + static ctl_table table[1]; + register_sysctl_paths(net_ipv4_ctl_path, table); +} + __initcall(sysctl_ipv4_init); -- cgit v1.2.3-70-g09d2 From 516e0cc5646f377ab80fcc2ee639892eccb99853 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 26 Jul 2008 00:39:17 -0400 Subject: [PATCH] f_count may wrap around make it atomic_long_t; while we are at it, get rid of useless checks in affs, hfs and hpfs - ->open() always has it equal to 1, ->release() - to 0. Signed-off-by: Al Viro --- drivers/net/ppp_generic.c | 6 +++--- fs/affs/file.c | 4 ---- fs/aio.c | 6 +++--- fs/file_table.c | 10 +++++----- fs/hfs/inode.c | 4 ---- fs/hfsplus/inode.c | 4 ---- include/linux/fs.h | 6 +++--- include/net/af_unix.h | 2 +- net/sched/sch_atm.c | 4 ++-- net/unix/af_unix.c | 2 +- net/unix/garbage.c | 18 +++++++++--------- 11 files changed, 27 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 739b3ab7bcc..ddccc074a76 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -581,12 +581,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (file == ppp->owner) ppp_shutdown_interface(ppp); } - if (atomic_read(&file->f_count) <= 2) { + if (atomic_long_read(&file->f_count) <= 2) { ppp_release(NULL, file); err = 0; } else - printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%d\n", - atomic_read(&file->f_count)); + printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%ld\n", + atomic_long_read(&file->f_count)); unlock_kernel(); return err; } diff --git a/fs/affs/file.c b/fs/affs/file.c index 6eac7bdeec9..1377b1240b6 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -46,8 +46,6 @@ const struct inode_operations affs_file_inode_operations = { static int affs_file_open(struct inode *inode, struct file *filp) { - if (atomic_read(&filp->f_count) != 1) - return 0; pr_debug("AFFS: open(%lu,%d)\n", inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); atomic_inc(&AFFS_I(inode)->i_opencnt); @@ -57,8 +55,6 @@ affs_file_open(struct inode *inode, struct file *filp) static int affs_file_release(struct inode *inode, struct file *filp) { - if (atomic_read(&filp->f_count) != 0) - return 0; pr_debug("AFFS: release(%lu, %d)\n", inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); diff --git a/fs/aio.c b/fs/aio.c index 0051fd94b44..f658441d566 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -512,8 +512,8 @@ static void aio_fput_routine(struct work_struct *data) */ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) { - dprintk(KERN_DEBUG "aio_put(%p): f_count=%d\n", - req, atomic_read(&req->ki_filp->f_count)); + dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n", + req, atomic_long_read(&req->ki_filp->f_count)); assert_spin_locked(&ctx->ctx_lock); @@ -528,7 +528,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) /* Must be done under the lock to serialise against cancellation. * Call this aio_fput as it duplicates fput via the fput_work. */ - if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { + if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); diff --git a/fs/file_table.c b/fs/file_table.c index 83084225b4c..f45a4493f9e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -120,7 +120,7 @@ struct file *get_empty_filp(void) tsk = current; INIT_LIST_HEAD(&f->f_u.fu_list); - atomic_set(&f->f_count, 1); + atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); f->f_uid = tsk->fsuid; f->f_gid = tsk->fsgid; @@ -219,7 +219,7 @@ EXPORT_SYMBOL(init_file); void fput(struct file *file) { - if (atomic_dec_and_test(&file->f_count)) + if (atomic_long_dec_and_test(&file->f_count)) __fput(file); } @@ -294,7 +294,7 @@ struct file *fget(unsigned int fd) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (!atomic_inc_not_zero(&file->f_count)) { + if (!atomic_long_inc_not_zero(&file->f_count)) { /* File object ref couldn't be taken */ rcu_read_unlock(); return NULL; @@ -326,7 +326,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (atomic_inc_not_zero(&file->f_count)) + if (atomic_long_inc_not_zero(&file->f_count)) *fput_needed = 1; else /* Didn't get the reference, someone's freed */ @@ -341,7 +341,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed) void put_filp(struct file *file) { - if (atomic_dec_and_test(&file->f_count)) { + if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); file_kill(file); file_free(file); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index aa73f3fd5dd..7e19835efa2 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -522,8 +522,6 @@ static int hfs_file_open(struct inode *inode, struct file *file) { if (HFS_IS_RSRC(inode)) inode = HFS_I(inode)->rsrc_inode; - if (atomic_read(&file->f_count) != 1) - return 0; atomic_inc(&HFS_I(inode)->opencnt); return 0; } @@ -534,8 +532,6 @@ static int hfs_file_release(struct inode *inode, struct file *file) if (HFS_IS_RSRC(inode)) inode = HFS_I(inode)->rsrc_inode; - if (atomic_read(&file->f_count) != 0) - return 0; if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) { mutex_lock(&inode->i_mutex); hfs_file_truncate(inode); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index d4014e3044d..b085d64a2b6 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -254,8 +254,6 @@ static int hfsplus_file_open(struct inode *inode, struct file *file) { if (HFSPLUS_IS_RSRC(inode)) inode = HFSPLUS_I(inode).rsrc_inode; - if (atomic_read(&file->f_count) != 1) - return 0; atomic_inc(&HFSPLUS_I(inode).opencnt); return 0; } @@ -266,8 +264,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) if (HFSPLUS_IS_RSRC(inode)) inode = HFSPLUS_I(inode).rsrc_inode; - if (atomic_read(&file->f_count) != 0) - return 0; if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) { mutex_lock(&inode->i_mutex); hfsplus_file_truncate(inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index 9d2de4cadab..7676fa1c20a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -795,7 +795,7 @@ struct file { #define f_dentry f_path.dentry #define f_vfsmnt f_path.mnt const struct file_operations *f_op; - atomic_t f_count; + atomic_long_t f_count; unsigned int f_flags; mode_t f_mode; loff_t f_pos; @@ -824,8 +824,8 @@ extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); #define file_list_unlock() spin_unlock(&files_lock); -#define get_file(x) atomic_inc(&(x)->f_count) -#define file_count(x) atomic_read(&(x)->f_count) +#define get_file(x) atomic_long_inc(&(x)->f_count) +#define file_count(x) atomic_long_read(&(x)->f_count) #ifdef CONFIG_DEBUG_WRITECOUNT static inline void file_take_write(struct file *f) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 2dfa96b0575..7dd29b7e461 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -51,7 +51,7 @@ struct unix_sock { struct sock *peer; struct sock *other; struct list_head link; - atomic_t inflight; + atomic_long_t inflight; spinlock_t lock; unsigned int gc_candidate : 1; wait_queue_head_t peer_wait; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 04faa835be1..6b517b9dac5 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -162,7 +162,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) qdisc_destroy(flow->q); tcf_destroy_chain(&flow->filter_list); if (flow->sock) { - pr_debug("atm_tc_put: f_count %d\n", + pr_debug("atm_tc_put: f_count %ld\n", file_count(flow->sock->file)); flow->vcc->pop = flow->old_pop; sockfd_put(flow->sock); @@ -259,7 +259,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, sock = sockfd_lookup(fd, &error); if (!sock) return error; /* f_count++ */ - pr_debug("atm_tc_change: f_count %d\n", file_count(sock->file)); + pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file)); if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { error = -EPROTOTYPE; goto err_out; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 70ceb1604ad..6e7fec74bdb 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -603,7 +603,7 @@ static struct sock * unix_create1(struct net *net, struct socket *sock) u->dentry = NULL; u->mnt = NULL; spin_lock_init(&u->lock); - atomic_set(&u->inflight, 0); + atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index ebdff3d877a..2a27b84f740 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -127,7 +127,7 @@ void unix_inflight(struct file *fp) if(s) { struct unix_sock *u = unix_sk(s); spin_lock(&unix_gc_lock); - if (atomic_inc_return(&u->inflight) == 1) { + if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); } else { @@ -145,7 +145,7 @@ void unix_notinflight(struct file *fp) struct unix_sock *u = unix_sk(s); spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); - if (atomic_dec_and_test(&u->inflight)) + if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; spin_unlock(&unix_gc_lock); @@ -237,17 +237,17 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), static void dec_inflight(struct unix_sock *usk) { - atomic_dec(&usk->inflight); + atomic_long_dec(&usk->inflight); } static void inc_inflight(struct unix_sock *usk) { - atomic_inc(&usk->inflight); + atomic_long_inc(&usk->inflight); } static void inc_inflight_move_tail(struct unix_sock *u) { - atomic_inc(&u->inflight); + atomic_long_inc(&u->inflight); /* * If this is still a candidate, move it to the end of the * list, so that it's checked even if it was already passed @@ -288,11 +288,11 @@ void unix_gc(void) * before the detach without atomicity guarantees. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { - int total_refs; - int inflight_refs; + long total_refs; + long inflight_refs; total_refs = file_count(u->sk.sk_socket->file); - inflight_refs = atomic_read(&u->inflight); + inflight_refs = atomic_long_read(&u->inflight); BUG_ON(inflight_refs < 1); BUG_ON(total_refs < inflight_refs); @@ -324,7 +324,7 @@ void unix_gc(void) /* Move cursor to after the current position. */ list_move(&cursor, &u->link); - if (atomic_read(&u->inflight) > 0) { + if (atomic_long_read(&u->inflight) > 0) { list_move_tail(&u->link, &gc_inflight_list); u->gc_candidate = 0; scan_children(&u->sk, inc_inflight_move_tail, NULL); -- cgit v1.2.3-70-g09d2 From 2c3abab7c95295f319dc8899b74cbd60140fcdfb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 27 Jul 2008 03:59:24 -0700 Subject: ipcomp: Fix warnings after ipcomp consolidation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/ipv4/ipcomp.c: In function ‘ipcomp4_init_state’: net/ipv4/ipcomp.c:109: warning: unused variable ‘calg_desc’ net/ipv4/ipcomp.c:108: warning: unused variable ‘ipcd’ net/ipv4/ipcomp.c:107: warning: ‘err’ may be used uninitialized in this function net/ipv6/ipcomp6.c: In function ‘ipcomp6_init_state’: net/ipv6/ipcomp6.c:139: warning: unused variable ‘calg_desc’ net/ipv6/ipcomp6.c:138: warning: unused variable ‘ipcd’ net/ipv6/ipcomp6.c:137: warning: ‘err’ may be used uninitialized in this function Signed-off-by: David S. Miller --- net/ipv4/ipcomp.c | 4 +--- net/ipv6/ipcomp6.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index a42b64d040c..38ccb6dfb02 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -104,9 +104,7 @@ out: static int ipcomp4_init_state(struct xfrm_state *x) { - int err; - struct ipcomp_data *ipcd; - struct xfrm_algo_desc *calg_desc; + int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 0cfcea42153..4545e430686 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -134,9 +134,7 @@ out: static int ipcomp6_init_state(struct xfrm_state *x) { - int err; - struct ipcomp_data *ipcd; - struct xfrm_algo_desc *calg_desc; + int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { -- cgit v1.2.3-70-g09d2 From 6f9f489a4eeaa3c8a8618e078a5270d2c4872b67 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 27 Jul 2008 04:40:51 -0700 Subject: net: missing bits of net-namespace / sysctl Piss-poor sysctl registration API strikes again, film at 11... What we really need is _pathname_ required to be present in already registered table, so that kernel could warn about bad order. That's the next target for sysctl stuff (and generally saner and more explicit order of initialization of ipv[46] internals wouldn't hurt either). For the time being, here are full fixups required by ..._rotable() stuff; we make per-net sysctl sets descendents of "ro" one and make sure that sufficient skeleton is there before we start registering per-net sysctls. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ include/net/route.h | 2 -- net/ipv4/route.c | 11 ++++++++++- net/ipv4/sysctl_net_ipv4.c | 14 -------------- net/ipv6/af_inet6.c | 12 ++++++++++++ net/ipv6/sysctl_net_ipv6.c | 16 ++++++++++++++++ net/sysctl_net.c | 4 +++- 7 files changed, 43 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2d5c18514a2..113028fb8f6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -608,6 +608,8 @@ extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net); extern struct ctl_table *ipv6_route_sysctl_init(struct net *net); extern int ipv6_sysctl_register(void); extern void ipv6_sysctl_unregister(void); +extern int ipv6_static_sysctl_register(void); +extern void ipv6_static_sysctl_unregister(void); #endif #endif /* __KERNEL__ */ diff --git a/include/net/route.h b/include/net/route.h index 3140cc50085..4f0d8c14736 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -204,6 +204,4 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) return rt->peer; } -extern ctl_table ipv4_route_table[]; - #endif /* _ROUTE_H */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a507c5e27d0..380d6474cf6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2914,7 +2914,7 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, return 0; } -ctl_table ipv4_route_table[] = { +static ctl_table ipv4_route_table[] = { { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, .procname = "gc_thresh", @@ -3216,6 +3216,15 @@ int __init ip_rt_init(void) return rc; } +/* + * We really need to sanitize the damn ipv4 init order, then all + * this nonsense will go away. + */ +void __init ip_static_sysctl_init(void) +{ + register_sysctl_paths(ipv4_route_path, ipv4_route_table); +} + EXPORT_SYMBOL(__ip_select_ident); EXPORT_SYMBOL(ip_route_input); EXPORT_SYMBOL(ip_route_output_key); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d63e9388d92..770d827f5ab 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -401,13 +401,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = &ipv4_local_port_range, .strategy = &ipv4_sysctl_local_port_range, }, - { - .ctl_name = NET_IPV4_ROUTE, - .procname = "route", - .maxlen = 0, - .mode = 0555, - .child = ipv4_route_table - }, #ifdef CONFIG_IP_MULTICAST { .ctl_name = NET_IPV4_IGMP_MAX_MEMBERSHIPS, @@ -882,11 +875,4 @@ static __init int sysctl_ipv4_init(void) return 0; } -/* set enough of tree skeleton to get rid of ordering problems */ -void __init ip_static_sysctl_init(void) -{ - static ctl_table table[1]; - register_sysctl_paths(net_ipv4_ctl_path, table); -} - __initcall(sysctl_ipv4_init); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c708ca84229..95055f8c3f3 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -934,6 +934,11 @@ static int __init inet6_init(void) if (err) goto out_unregister_sock; +#ifdef CONFIG_SYSCTL + err = ipv6_static_sysctl_register(); + if (err) + goto static_sysctl_fail; +#endif /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance @@ -1058,6 +1063,10 @@ ipmr_fail: icmp_fail: unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: +#ifdef CONFIG_SYSCTL + ipv6_static_sysctl_unregister(); +static_sysctl_fail: +#endif cleanup_ipv6_mibs(); out_unregister_sock: sock_unregister(PF_INET6); @@ -1113,6 +1122,9 @@ static void __exit inet6_exit(void) rawv6_exit(); unregister_pernet_subsys(&inet6_net_ops); +#ifdef CONFIG_SYSCTL + ipv6_static_sysctl_unregister(); +#endif cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); proto_unregister(&udplitev6_prot); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 5c99274558b..e6dfaeac6be 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -150,3 +150,19 @@ void ipv6_sysctl_unregister(void) unregister_net_sysctl_table(ip6_header); unregister_pernet_subsys(&ipv6_sysctl_net_ops); } + +static struct ctl_table_header *ip6_base; + +int ipv6_static_sysctl_register(void) +{ + static struct ctl_table empty[1]; + ip6_base = register_net_sysctl_rotable(net_ipv6_ctl_path, empty); + if (ip6_base == NULL) + return -ENOMEM; + return 0; +} + +void ipv6_static_sysctl_unregister(void) +{ + unregister_net_sysctl_table(ip6_base); +} diff --git a/net/sysctl_net.c b/net/sysctl_net.c index cefbc367d8b..972201cd5fa 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -73,7 +73,9 @@ static struct ctl_table_root net_sysctl_ro_root = { static int sysctl_net_init(struct net *net) { - setup_sysctl_set(&net->sysctls, NULL, is_seen); + setup_sysctl_set(&net->sysctls, + &net_sysctl_ro_root.default_set, + is_seen); return 0; } -- cgit v1.2.3-70-g09d2 From eeb61f719c00c626115852bbc91189dc3011a844 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 27 Jul 2008 08:59:33 +0100 Subject: missing bits of net-namespace / sysctl Piss-poor sysctl registration API strikes again, film at 11... What we really need is _pathname_ required to be present in already registered table, so that kernel could warn about bad order. That's the next target for sysctl stuff (and generally saner and more explicit order of initialization of ipv[46] internals wouldn't hurt either). For the time being, here are full fixups required by ..._rotable() stuff; we make per-net sysctl sets descendents of "ro" one and make sure that sufficient skeleton is there before we start registering per-net sysctls. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/net/ipv6.h | 2 ++ include/net/route.h | 2 -- net/ipv4/route.c | 11 ++++++++++- net/ipv4/sysctl_net_ipv4.c | 14 -------------- net/ipv6/af_inet6.c | 12 ++++++++++++ net/ipv6/sysctl_net_ipv6.c | 16 ++++++++++++++++ net/sysctl_net.c | 4 +++- 7 files changed, 43 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2d5c18514a2..113028fb8f6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -608,6 +608,8 @@ extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net); extern struct ctl_table *ipv6_route_sysctl_init(struct net *net); extern int ipv6_sysctl_register(void); extern void ipv6_sysctl_unregister(void); +extern int ipv6_static_sysctl_register(void); +extern void ipv6_static_sysctl_unregister(void); #endif #endif /* __KERNEL__ */ diff --git a/include/net/route.h b/include/net/route.h index 3140cc50085..4f0d8c14736 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -204,6 +204,4 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) return rt->peer; } -extern ctl_table ipv4_route_table[]; - #endif /* _ROUTE_H */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a507c5e27d0..380d6474cf6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2914,7 +2914,7 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, return 0; } -ctl_table ipv4_route_table[] = { +static ctl_table ipv4_route_table[] = { { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, .procname = "gc_thresh", @@ -3216,6 +3216,15 @@ int __init ip_rt_init(void) return rc; } +/* + * We really need to sanitize the damn ipv4 init order, then all + * this nonsense will go away. + */ +void __init ip_static_sysctl_init(void) +{ + register_sysctl_paths(ipv4_route_path, ipv4_route_table); +} + EXPORT_SYMBOL(__ip_select_ident); EXPORT_SYMBOL(ip_route_input); EXPORT_SYMBOL(ip_route_output_key); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d63e9388d92..770d827f5ab 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -401,13 +401,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = &ipv4_local_port_range, .strategy = &ipv4_sysctl_local_port_range, }, - { - .ctl_name = NET_IPV4_ROUTE, - .procname = "route", - .maxlen = 0, - .mode = 0555, - .child = ipv4_route_table - }, #ifdef CONFIG_IP_MULTICAST { .ctl_name = NET_IPV4_IGMP_MAX_MEMBERSHIPS, @@ -882,11 +875,4 @@ static __init int sysctl_ipv4_init(void) return 0; } -/* set enough of tree skeleton to get rid of ordering problems */ -void __init ip_static_sysctl_init(void) -{ - static ctl_table table[1]; - register_sysctl_paths(net_ipv4_ctl_path, table); -} - __initcall(sysctl_ipv4_init); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c708ca84229..95055f8c3f3 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -934,6 +934,11 @@ static int __init inet6_init(void) if (err) goto out_unregister_sock; +#ifdef CONFIG_SYSCTL + err = ipv6_static_sysctl_register(); + if (err) + goto static_sysctl_fail; +#endif /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance @@ -1058,6 +1063,10 @@ ipmr_fail: icmp_fail: unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: +#ifdef CONFIG_SYSCTL + ipv6_static_sysctl_unregister(); +static_sysctl_fail: +#endif cleanup_ipv6_mibs(); out_unregister_sock: sock_unregister(PF_INET6); @@ -1113,6 +1122,9 @@ static void __exit inet6_exit(void) rawv6_exit(); unregister_pernet_subsys(&inet6_net_ops); +#ifdef CONFIG_SYSCTL + ipv6_static_sysctl_unregister(); +#endif cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); proto_unregister(&udplitev6_prot); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 5c99274558b..e6dfaeac6be 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -150,3 +150,19 @@ void ipv6_sysctl_unregister(void) unregister_net_sysctl_table(ip6_header); unregister_pernet_subsys(&ipv6_sysctl_net_ops); } + +static struct ctl_table_header *ip6_base; + +int ipv6_static_sysctl_register(void) +{ + static struct ctl_table empty[1]; + ip6_base = register_net_sysctl_rotable(net_ipv6_ctl_path, empty); + if (ip6_base == NULL) + return -ENOMEM; + return 0; +} + +void ipv6_static_sysctl_unregister(void) +{ + unregister_net_sysctl_table(ip6_base); +} diff --git a/net/sysctl_net.c b/net/sysctl_net.c index cefbc367d8b..972201cd5fa 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -73,7 +73,9 @@ static struct ctl_table_root net_sysctl_ro_root = { static int sysctl_net_init(struct net *net) { - setup_sysctl_set(&net->sysctls, NULL, is_seen); + setup_sysctl_set(&net->sysctls, + &net_sysctl_ro_root.default_set, + is_seen); return 0; } -- cgit v1.2.3-70-g09d2