aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/flex_array.h3
-rw-r--r--include/linux/reciprocal_div.h39
-rw-r--r--include/linux/slab_def.h4
-rw-r--r--include/net/red.h3
4 files changed, 28 insertions, 21 deletions
diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 6843cf193a4..b6efb0c6440 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -2,6 +2,7 @@
#define _FLEX_ARRAY_H
#include <linux/types.h>
+#include <linux/reciprocal_div.h>
#include <asm/page.h>
#define FLEX_ARRAY_PART_SIZE PAGE_SIZE
@@ -22,7 +23,7 @@ struct flex_array {
int element_size;
int total_nr_elements;
int elems_per_part;
- u32 reciprocal_elems;
+ struct reciprocal_value reciprocal_elems;
struct flex_array_part *parts[];
};
/*
diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h
index f9c90b33285..8c5a3fb6c6c 100644
--- a/include/linux/reciprocal_div.h
+++ b/include/linux/reciprocal_div.h
@@ -4,29 +4,32 @@
#include <linux/types.h>
/*
- * This file describes reciprocical division.
+ * This algorithm is based on the paper "Division by Invariant
+ * Integers Using Multiplication" by Torbjörn Granlund and Peter
+ * L. Montgomery.
*
- * This optimizes the (A/B) problem, when A and B are two u32
- * and B is a known value (but not known at compile time)
+ * The assembler implementation from Agner Fog, which this code is
+ * based on, can be found here:
+ * http://www.agner.org/optimize/asmlib.zip
*
- * The math principle used is :
- * Let RECIPROCAL_VALUE(B) be (((1LL << 32) + (B - 1))/ B)
- * Then A / B = (u32)(((u64)(A) * (R)) >> 32)
- *
- * This replaces a divide by a multiply (and a shift), and
- * is generally less expensive in CPU cycles.
+ * This optimization for A/B is helpful if the divisor B is mostly
+ * runtime invariant. The reciprocal of B is calculated in the
+ * slow-path with reciprocal_value(). The fast-path can then just use
+ * a much faster multiplication operation with a variable dividend A
+ * to calculate the division A/B.
*/
-/*
- * Computes the reciprocal value (R) for the value B of the divisor.
- * Should not be called before each reciprocal_divide(),
- * or else the performance is slower than a normal divide.
- */
-extern u32 reciprocal_value(u32 B);
+struct reciprocal_value {
+ u32 m;
+ u8 sh1, sh2;
+};
+struct reciprocal_value reciprocal_value(u32 d);
-static inline u32 reciprocal_divide(u32 A, u32 R)
+static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
{
- return (u32)(((u64)A * R) >> 32);
+ u32 t = (u32)(((u64)a * R.m) >> 32);
+ return (t + ((a - t) >> R.sh1)) >> R.sh2;
}
-#endif
+
+#endif /* _LINUX_RECIPROCAL_DIV_H */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 09bfffb08a5..96e8abae19a 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -1,6 +1,8 @@
#ifndef _LINUX_SLAB_DEF_H
#define _LINUX_SLAB_DEF_H
+#include <linux/reciprocal_div.h>
+
/*
* Definitions unique to the original Linux SLAB allocator.
*/
@@ -12,7 +14,7 @@ struct kmem_cache {
unsigned int shared;
unsigned int size;
- u32 reciprocal_buffer_size;
+ struct reciprocal_value reciprocal_buffer_size;
/* 2) touched by every alloc & free from the backend */
unsigned int flags; /* constant flags */
diff --git a/include/net/red.h b/include/net/red.h
index 168bb2f495f..76e0b5f922c 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -130,7 +130,8 @@ struct red_parms {
u32 qth_max; /* Max avg length threshold: Wlog scaled */
u32 Scell_max;
u32 max_P; /* probability, [0 .. 1.0] 32 scaled */
- u32 max_P_reciprocal; /* reciprocal_value(max_P / qth_delta) */
+ /* reciprocal_value(max_P / qth_delta) */
+ struct reciprocal_value max_P_reciprocal;
u32 qth_delta; /* max_th - min_th */
u32 target_min; /* min_th + 0.4*(max_th - min_th) */
u32 target_max; /* min_th + 0.6*(max_th - min_th) */