aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/route.c140
3 files changed, 101 insertions, 43 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 2daf096dfc6..fb62c590360 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -46,6 +46,7 @@ struct fib_config {
};
struct fib_info;
+struct rtable;
struct fib_nh_exception {
struct fib_nh_exception __rcu *fnhe_next;
@@ -80,6 +81,7 @@ struct fib_nh {
__be32 nh_gw;
__be32 nh_saddr;
int nh_saddr_genid;
+ struct rtable *nh_rth_output;
struct fnhe_hash_bucket *nh_exceptions;
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 2b57d768240..83d0f42b619 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -171,6 +171,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
dev_put(nexthop_nh->nh_dev);
if (nexthop_nh->nh_exceptions)
free_nh_exceptions(nexthop_nh);
+ if (nexthop_nh->nh_rth_output)
+ dst_release(&nexthop_nh->nh_rth_output->dst);
} endfor_nexthops(fi);
release_net(fi->fib_net);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d52f7699c2f..8a0260010ea 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1158,8 +1158,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
return mtu;
}
-static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
- struct fib_info *fi)
+static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
{
if (fi->fib_metrics != (u32 *) dst_default_metrics) {
rt->fi = fi;
@@ -1168,50 +1167,83 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
}
-static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr)
+static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
{
struct fnhe_hash_bucket *hash = nh->nh_exceptions;
struct fib_nh_exception *fnhe;
u32 hval;
+ if (!hash)
+ return NULL;
+
hval = fnhe_hashfun(daddr);
-restart:
for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
fnhe = rcu_dereference(fnhe->fnhe_next)) {
- __be32 fnhe_daddr, gw;
- unsigned long expires;
- unsigned int seq;
- u32 pmtu;
-
- seq = read_seqbegin(&fnhe_seqlock);
- fnhe_daddr = fnhe->fnhe_daddr;
- gw = fnhe->fnhe_gw;
- pmtu = fnhe->fnhe_pmtu;
- expires = fnhe->fnhe_expires;
- if (read_seqretry(&fnhe_seqlock, seq))
- goto restart;
- if (daddr != fnhe_daddr)
- continue;
- if (pmtu) {
- unsigned long diff = expires - jiffies;
+ if (fnhe->fnhe_daddr == daddr)
+ return fnhe;
+ }
+ return NULL;
+}
- if (time_before(jiffies, expires)) {
- rt->rt_pmtu = pmtu;
- dst_set_expires(&rt->dst, diff);
- }
- }
- if (gw) {
- rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = gw;
+static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
+ __be32 daddr)
+{
+ __be32 fnhe_daddr, gw;
+ unsigned long expires;
+ unsigned int seq;
+ u32 pmtu;
+
+restart:
+ seq = read_seqbegin(&fnhe_seqlock);
+ fnhe_daddr = fnhe->fnhe_daddr;
+ gw = fnhe->fnhe_gw;
+ pmtu = fnhe->fnhe_pmtu;
+ expires = fnhe->fnhe_expires;
+ if (read_seqretry(&fnhe_seqlock, seq))
+ goto restart;
+
+ if (daddr != fnhe_daddr)
+ return;
+
+ if (pmtu) {
+ unsigned long diff = expires - jiffies;
+
+ if (time_before(jiffies, expires)) {
+ rt->rt_pmtu = pmtu;
+ dst_set_expires(&rt->dst, diff);
}
- fnhe->fnhe_stamp = jiffies;
- break;
+ }
+ if (gw) {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ rt->rt_gateway = gw;
+ }
+ fnhe->fnhe_stamp = jiffies;
+}
+
+static inline void rt_release_rcu(struct rcu_head *head)
+{
+ struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
+ dst_release(dst);
+}
+
+static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
+{
+ struct rtable *orig, *prev, **p = &nh->nh_rth_output;
+
+ orig = *p;
+
+ prev = cmpxchg(p, orig, rt);
+ if (prev == orig) {
+ dst_clone(&rt->dst);
+ if (orig)
+ call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu);
}
}
-static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
+static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
const struct fib_result *res,
+ struct fib_nh_exception *fnhe,
struct fib_info *fi, u16 type, u32 itag)
{
if (fi) {
@@ -1219,12 +1251,15 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = nh->nh_gw;
- if (unlikely(nh->nh_exceptions))
- rt_bind_exception(rt, nh, fl4->daddr);
- rt_init_metrics(rt, fl4, fi);
+ if (unlikely(fnhe))
+ rt_bind_exception(rt, fnhe, daddr);
+ rt_init_metrics(rt, fi);
#ifdef CONFIG_IP_ROUTE_CLASSID
- rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
+ rt->dst.tclassid = nh->nh_tclassid;
#endif
+ if (!(rt->dst.flags & DST_HOST) &&
+ rt_is_output_route(rt))
+ rt_cache_route(nh, rt);
}
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -1236,10 +1271,10 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
}
static struct rtable *rt_dst_alloc(struct net_device *dev,
- bool nopolicy, bool noxfrm)
+ bool nopolicy, bool noxfrm, bool will_cache)
{
return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
- DST_HOST | DST_NOCACHE |
+ (will_cache ? 0 : DST_HOST) | DST_NOCACHE |
(nopolicy ? DST_NOPOLICY : 0) |
(noxfrm ? DST_NOXFRM : 0));
}
@@ -1276,7 +1311,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto e_err;
}
rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
- IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
+ IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
if (!rth)
goto e_nobufs;
@@ -1349,6 +1384,7 @@ static int __mkroute_input(struct sk_buff *skb,
__be32 daddr, __be32 saddr, u32 tos,
struct rtable **result)
{
+ struct fib_nh_exception *fnhe;
struct rtable *rth;
int err;
struct in_device *out_dev;
@@ -1395,9 +1431,13 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
+ fnhe = NULL;
+ if (res->fi)
+ fnhe = find_exception(&FIB_RES_NH(*res), daddr);
+
rth = rt_dst_alloc(out_dev->dev,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
- IN_DEV_CONF_GET(out_dev, NOXFRM));
+ IN_DEV_CONF_GET(out_dev, NOXFRM), false);
if (!rth) {
err = -ENOBUFS;
goto cleanup;
@@ -1416,7 +1456,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
- rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
+ rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
*result = rth;
err = 0;
@@ -1558,7 +1598,7 @@ brd_input:
local_input:
rth = rt_dst_alloc(net->loopback_dev,
- IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
+ IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
if (!rth)
goto e_nobufs;
@@ -1672,6 +1712,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
unsigned int flags)
{
struct fib_info *fi = res->fi;
+ struct fib_nh_exception *fnhe;
struct in_device *in_dev;
u16 type = res->type;
struct rtable *rth;
@@ -1710,9 +1751,22 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
fi = NULL;
}
+ fnhe = NULL;
+ if (fi) {
+ fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
+ if (!fnhe) {
+ rth = FIB_RES_NH(*res).nh_rth_output;
+ if (rth &&
+ rth->dst.obsolete == DST_OBSOLETE_FORCE_CHK) {
+ dst_use(&rth->dst, jiffies);
+ return rth;
+ }
+ }
+ }
rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
- IN_DEV_CONF_GET(in_dev, NOXFRM));
+ IN_DEV_CONF_GET(in_dev, NOXFRM),
+ fi && !fnhe);
if (!rth)
return ERR_PTR(-ENOBUFS);
@@ -1749,7 +1803,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
#endif
}
- rt_set_nexthop(rth, fl4, res, fi, type, 0);
+ rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE)
rth->dst.flags |= DST_NOCACHE;