aboutsummaryrefslogtreecommitdiff
path: root/net/core/sock.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/sock.c')
-rw-r--r--net/core/sock.c472
1 files changed, 342 insertions, 130 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index 8a146cfcc36..026e01f7027 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -93,6 +93,7 @@
#include <linux/capability.h>
#include <linux/errno.h>
+#include <linux/errqueue.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
@@ -139,9 +140,60 @@
#include <net/tcp.h>
#endif
+#include <net/busy_poll.h>
+
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
+/**
+ * sk_ns_capable - General socket capability test
+ * @sk: Socket to use a capability on or through
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket had when the socket was
+ * created and the current process has the capability @cap in the user
+ * namespace @user_ns.
+ */
+bool sk_ns_capable(const struct sock *sk,
+ struct user_namespace *user_ns, int cap)
+{
+ return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
+ ns_capable(user_ns, cap);
+}
+EXPORT_SYMBOL(sk_ns_capable);
+
+/**
+ * sk_capable - Socket global capability test
+ * @sk: Socket to use a capability on or through
+ * @cap: The global capbility to use
+ *
+ * Test to see if the opener of the socket had when the socket was
+ * created and the current process has the capability @cap in all user
+ * namespaces.
+ */
+bool sk_capable(const struct sock *sk, int cap)
+{
+ return sk_ns_capable(sk, &init_user_ns, cap);
+}
+EXPORT_SYMBOL(sk_capable);
+
+/**
+ * sk_net_capable - Network namespace socket capability test
+ * @sk: Socket to use a capability on or through
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket had when the socke was created
+ * and the current process has the capability @cap over the network namespace
+ * the socket is a member of.
+ */
+bool sk_net_capable(const struct sock *sk, int cap)
+{
+ return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
+}
+EXPORT_SYMBOL(sk_net_capable);
+
+
#ifdef CONFIG_MEMCG_KMEM
int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
{
@@ -186,8 +238,10 @@ void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
static struct lock_class_key af_family_keys[AF_MAX];
static struct lock_class_key af_family_slock_keys[AF_MAX];
+#if defined(CONFIG_MEMCG_KMEM)
struct static_key memcg_socket_limit_enabled;
EXPORT_SYMBOL(memcg_socket_limit_enabled);
+#endif
/*
* Make lock validator output more readable. (we pre-construct these
@@ -208,7 +262,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
"sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
- "sk_lock-AF_NFC" , "sk_lock-AF_MAX"
+ "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -224,7 +278,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
"slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
- "slock-AF_NFC" , "slock-AF_MAX"
+ "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
@@ -240,7 +294,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
"clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
- "clock-AF_NFC" , "clock-AF_MAX"
+ "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
};
/*
@@ -423,7 +477,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
spin_unlock_irqrestore(&list->lock, flags);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skb_len);
+ sk->sk_data_ready(sk);
return 0;
}
EXPORT_SYMBOL(sock_queue_rcv_skb);
@@ -470,12 +524,6 @@ discard_and_relse:
}
EXPORT_SYMBOL(sk_receive_skb);
-void sk_reset_txq(struct sock *sk)
-{
- sk_tx_queue_clear(sk);
-}
-EXPORT_SYMBOL(sk_reset_txq);
-
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = __sk_dst_get(sk);
@@ -505,7 +553,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
}
EXPORT_SYMBOL(sk_dst_check);
-static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
+static int sock_setbindtodevice(struct sock *sk, char __user *optval,
+ int optlen)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
@@ -515,7 +564,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
/* Sorry... */
ret = -EPERM;
- if (!capable(CAP_NET_RAW))
+ if (!ns_capable(net->user_ns, CAP_NET_RAW))
goto out;
ret = -EINVAL;
@@ -562,6 +611,46 @@ out:
return ret;
}
+static int sock_getbindtodevice(struct sock *sk, char __user *optval,
+ int __user *optlen, int len)
+{
+ int ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+ struct net *net = sock_net(sk);
+ char devname[IFNAMSIZ];
+
+ if (sk->sk_bound_dev_if == 0) {
+ len = 0;
+ goto zero;
+ }
+
+ ret = -EINVAL;
+ if (len < IFNAMSIZ)
+ goto out;
+
+ ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
+ if (ret)
+ goto out;
+
+ len = strlen(devname) + 1;
+
+ ret = -EFAULT;
+ if (copy_to_user(optval, devname, len))
+ goto out;
+
+zero:
+ ret = -EFAULT;
+ if (put_user(len, optlen))
+ goto out;
+
+ ret = 0;
+
+out:
+#endif
+
+ return ret;
+}
+
static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
{
if (valbool)
@@ -589,7 +678,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
*/
if (optname == SO_BINDTODEVICE)
- return sock_bindtodevice(sk, optval, optlen);
+ return sock_setbindtodevice(sk, optval, optlen);
if (optlen < sizeof(int))
return -EINVAL;
@@ -611,6 +700,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
case SO_REUSEADDR:
sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
break;
+ case SO_REUSEPORT:
+ sk->sk_reuseport = valbool;
+ break;
case SO_TYPE:
case SO_PROTOCOL:
case SO_DOMAIN:
@@ -692,11 +784,12 @@ set_rcvbuf:
break;
case SO_NO_CHECK:
- sk->sk_no_check = valbool;
+ sk->sk_no_check_tx = valbool;
break;
case SO_PRIORITY:
- if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
+ if ((val >= 0 && val <= 6) ||
+ ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
sk->sk_priority = val;
else
ret = -EPERM;
@@ -806,6 +899,13 @@ set_rcvbuf:
ret = sk_detach_filter(sk);
break;
+ case SO_LOCK_FILTER:
+ if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
+ ret = -EPERM;
+ else
+ sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
+ break;
+
case SO_PASSSEC:
if (valbool)
set_bit(SOCK_PASSSEC, &sock->flags);
@@ -813,7 +913,7 @@ set_rcvbuf:
clear_bit(SOCK_PASSSEC, &sock->flags);
break;
case SO_MARK:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
ret = -EPERM;
else
sk->sk_mark = val;
@@ -831,7 +931,7 @@ set_rcvbuf:
case SO_PEEK_OFF:
if (sock->ops->set_peek_off)
- sock->ops->set_peek_off(sk, val);
+ ret = sock->ops->set_peek_off(sk, val);
else
ret = -EOPNOTSUPP;
break;
@@ -840,6 +940,30 @@ set_rcvbuf:
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
break;
+ case SO_SELECT_ERR_QUEUE:
+ sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
+ break;
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ case SO_BUSY_POLL:
+ /* allow unprivileged users to decrease the value */
+ if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
+ ret = -EPERM;
+ else {
+ if (val < 0)
+ ret = -EINVAL;
+ else
+ sk->sk_ll_usec = val;
+ }
+ break;
+#endif
+
+ case SO_MAX_PACING_RATE:
+ sk->sk_max_pacing_rate = val;
+ sk->sk_pacing_rate = min(sk->sk_pacing_rate,
+ sk->sk_max_pacing_rate);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -850,8 +974,8 @@ set_rcvbuf:
EXPORT_SYMBOL(sock_setsockopt);
-void cred_to_ucred(struct pid *pid, const struct cred *cred,
- struct ucred *ucred)
+static void cred_to_ucred(struct pid *pid, const struct cred *cred,
+ struct ucred *ucred)
{
ucred->pid = pid_vnr(pid);
ucred->uid = ucred->gid = -1;
@@ -862,7 +986,6 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred,
ucred->gid = from_kgid_munged(current_ns, cred->egid);
}
}
-EXPORT_SYMBOL_GPL(cred_to_ucred);
int sock_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
@@ -910,6 +1033,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_reuse;
break;
+ case SO_REUSEPORT:
+ v.val = sk->sk_reuseport;
+ break;
+
case SO_KEEPALIVE:
v.val = sock_flag(sk, SOCK_KEEPOPEN);
break;
@@ -937,7 +1064,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_NO_CHECK:
- v.val = sk->sk_no_check;
+ v.val = sk->sk_no_check_tx;
break;
case SO_PRIORITY:
@@ -1074,6 +1201,39 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_NOFCS:
v.val = sock_flag(sk, SOCK_NOFCS);
break;
+
+ case SO_BINDTODEVICE:
+ return sock_getbindtodevice(sk, optval, optlen, len);
+
+ case SO_GET_FILTER:
+ len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
+ if (len < 0)
+ return len;
+
+ goto lenout;
+
+ case SO_LOCK_FILTER:
+ v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
+ break;
+
+ case SO_BPF_EXTENSIONS:
+ v.val = bpf_tell_extensions();
+ break;
+
+ case SO_SELECT_ERR_QUEUE:
+ v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
+ break;
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ case SO_BUSY_POLL:
+ v.val = sk->sk_ll_usec;
+ break;
+#endif
+
+ case SO_MAX_PACING_RATE:
+ v.val = sk->sk_max_pacing_rate;
+ break;
+
default:
return -ENOPROTOOPT;
}
@@ -1123,18 +1283,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
#endif
}
-/*
- * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
- * un-modified. Special care is taken when initializing object to zero.
- */
-static inline void sk_prot_clear_nulls(struct sock *sk, int size)
-{
- if (offsetof(struct sock, sk_node.next) != 0)
- memset(sk, 0, offsetof(struct sock, sk_node.next));
- memset(&sk->sk_node.pprev, 0,
- size - offsetof(struct sock, sk_node.pprev));
-}
-
void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
{
unsigned long nulls1, nulls2;
@@ -1212,32 +1360,16 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
module_put(owner);
}
-#ifdef CONFIG_CGROUPS
-#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
-void sock_update_classid(struct sock *sk)
-{
- u32 classid;
-
- rcu_read_lock(); /* doing current task, which cannot vanish. */
- classid = task_cls_classid(current);
- rcu_read_unlock();
- if (classid != sk->sk_classid)
- sk->sk_classid = classid;
-}
-EXPORT_SYMBOL(sock_update_classid);
-#endif
-
-#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
-void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
+#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
+void sock_update_netprioidx(struct sock *sk)
{
if (in_interrupt())
return;
- sk->sk_cgrp_prioidx = task_netprioidx(task);
+ sk->sk_cgrp_prioidx = task_netprioidx(current);
}
EXPORT_SYMBOL_GPL(sock_update_netprioidx);
#endif
-#endif
/**
* sk_alloc - All socket objects are allocated here
@@ -1264,7 +1396,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
atomic_set(&sk->sk_wmem_alloc, 1);
sock_update_classid(sk);
- sock_update_netprioidx(sk, current);
+ sock_update_netprioidx(sk);
}
return sk;
@@ -1489,6 +1621,25 @@ void sock_wfree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_wfree);
+void skb_orphan_partial(struct sk_buff *skb)
+{
+ /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
+ * so we do not completely orphan skb, but transfert all
+ * accounted bytes but one, to avoid unexpected reorders.
+ */
+ if (skb->destructor == sock_wfree
+#ifdef CONFIG_INET
+ || skb->destructor == tcp_wfree
+#endif
+ ) {
+ atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
+ skb->truesize = 1;
+ } else {
+ skb_orphan(skb);
+ }
+}
+EXPORT_SYMBOL(skb_orphan_partial);
+
/*
* Read buffer destructor automatically called from kfree_skb.
*/
@@ -1555,22 +1706,6 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
EXPORT_SYMBOL(sock_wmalloc);
/*
- * Allocate a skb from the socket's receive buffer.
- */
-struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
- gfp_t priority)
-{
- if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
- struct sk_buff *skb = alloc_skb(size, priority);
- if (skb) {
- skb_set_owner_r(skb, sk);
- return skb;
- }
- }
- return NULL;
-}
-
-/*
* Allocate a memory block from the socket's option memory buffer.
*/
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
@@ -1635,24 +1770,23 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
unsigned long data_len, int noblock,
- int *errcode)
+ int *errcode, int max_page_order)
{
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
+ unsigned long chunk;
gfp_t gfp_mask;
long timeo;
int err;
int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ struct page *page;
+ int i;
err = -EMSGSIZE;
if (npages > MAX_SKB_FRAGS)
goto failure;
- gfp_mask = sk->sk_allocation;
- if (gfp_mask & __GFP_WAIT)
- gfp_mask |= __GFP_REPEAT;
-
timeo = sock_sndtimeo(sk, noblock);
- while (1) {
+ while (!skb) {
err = sock_error(sk);
if (err != 0)
goto failure;
@@ -1661,50 +1795,54 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto failure;
- if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
- skb = alloc_skb(header_len, gfp_mask);
- if (skb) {
- int i;
-
- /* No pages, we're done... */
- if (!data_len)
- break;
-
- skb->truesize += data_len;
- skb_shinfo(skb)->nr_frags = npages;
- for (i = 0; i < npages; i++) {
- struct page *page;
-
- page = alloc_pages(sk->sk_allocation, 0);
- if (!page) {
- err = -ENOBUFS;
- skb_shinfo(skb)->nr_frags = i;
- kfree_skb(skb);
- goto failure;
- }
-
- __skb_fill_page_desc(skb, i,
- page, 0,
- (data_len >= PAGE_SIZE ?
- PAGE_SIZE :
- data_len));
- data_len -= PAGE_SIZE;
- }
+ if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ err = -EAGAIN;
+ if (!timeo)
+ goto failure;
+ if (signal_pending(current))
+ goto interrupted;
+ timeo = sock_wait_for_wmem(sk, timeo);
+ continue;
+ }
- /* Full success... */
- break;
- }
- err = -ENOBUFS;
+ err = -ENOBUFS;
+ gfp_mask = sk->sk_allocation;
+ if (gfp_mask & __GFP_WAIT)
+ gfp_mask |= __GFP_REPEAT;
+
+ skb = alloc_skb(header_len, gfp_mask);
+ if (!skb)
goto failure;
+
+ skb->truesize += data_len;
+
+ for (i = 0; npages > 0; i++) {
+ int order = max_page_order;
+
+ while (order) {
+ if (npages >= 1 << order) {
+ page = alloc_pages(sk->sk_allocation |
+ __GFP_COMP |
+ __GFP_NOWARN |
+ __GFP_NORETRY,
+ order);
+ if (page)
+ goto fill_page;
+ }
+ order--;
+ }
+ page = alloc_page(sk->sk_allocation);
+ if (!page)
+ goto failure;
+fill_page:
+ chunk = min_t(unsigned long, data_len,
+ PAGE_SIZE << order);
+ skb_fill_page_desc(skb, i, page, 0, chunk);
+ data_len -= chunk;
+ npages -= 1 << order;
}
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- err = -EAGAIN;
- if (!timeo)
- goto failure;
- if (signal_pending(current))
- goto interrupted;
- timeo = sock_wait_for_wmem(sk, timeo);
}
skb_set_owner_w(skb, sk);
@@ -1713,6 +1851,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
interrupted:
err = sock_intr_errno(timeo);
failure:
+ kfree_skb(skb);
*errcode = err;
return NULL;
}
@@ -1721,14 +1860,24 @@ EXPORT_SYMBOL(sock_alloc_send_pskb);
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode)
{
- return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
+ return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
}
EXPORT_SYMBOL(sock_alloc_send_skb);
/* On 32bit arches, an skb frag is limited to 2^15 */
#define SKB_FRAG_PAGE_ORDER get_order(32768)
-bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+/**
+ * skb_page_frag_refill - check that a page_frag contains enough room
+ * @sz: minimum size of the fragment we want to get
+ * @pfrag: pointer to page_frag
+ * @prio: priority for memory allocation
+ *
+ * Note: While this allocator tries to use high order pages, there is
+ * no guarantee that allocations succeed. Therefore, @sz MUST be
+ * less or equal than PAGE_SIZE.
+ */
+bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
{
int order;
@@ -1737,19 +1886,17 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
pfrag->offset = 0;
return true;
}
- if (pfrag->offset < pfrag->size)
+ if (pfrag->offset + sz <= pfrag->size)
return true;
put_page(pfrag->page);
}
- /* We restrict high order allocations to users that can afford to wait */
- order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
-
+ order = SKB_FRAG_PAGE_ORDER;
do {
- gfp_t gfp = sk->sk_allocation;
+ gfp_t gfp = prio;
if (order)
- gfp |= __GFP_COMP | __GFP_NOWARN;
+ gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
pfrag->page = alloc_pages(gfp, order);
if (likely(pfrag->page)) {
pfrag->offset = 0;
@@ -1758,6 +1905,15 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
}
} while (--order >= 0);
+ return false;
+}
+EXPORT_SYMBOL(skb_page_frag_refill);
+
+bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+{
+ if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
+ return true;
+
sk_enter_memory_pressure(sk);
sk_stream_moderate_sndbuf(sk);
return false;
@@ -2089,7 +2245,7 @@ static void sock_def_error_report(struct sock *sk)
rcu_read_unlock();
}
-static void sock_def_readable(struct sock *sk, int len)
+static void sock_def_readable(struct sock *sk)
{
struct socket_wq *wq;
@@ -2148,7 +2304,7 @@ EXPORT_SYMBOL(sk_reset_timer);
void sk_stop_timer(struct sock *sk, struct timer_list* timer)
{
- if (timer_pending(timer) && del_timer(timer))
+ if (del_timer(timer))
__sock_put(sk);
}
EXPORT_SYMBOL(sk_stop_timer);
@@ -2206,6 +2362,13 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_stamp = ktime_set(-1L, 0);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ sk->sk_napi_id = 0;
+ sk->sk_ll_usec = sysctl_net_busy_read;
+#endif
+
+ sk->sk_max_pacing_rate = ~0U;
+ sk->sk_pacing_rate = ~0U;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -2243,10 +2406,13 @@ void release_sock(struct sock *sk)
if (sk->sk_backlog.tail)
__release_sock(sk);
+ /* Warning : release_cb() might need to release sk ownership,
+ * ie call sock_release_ownership(sk) before us.
+ */
if (sk->sk_prot->release_cb)
sk->sk_prot->release_cb(sk);
- sk->sk_lock.owned = 0;
+ sock_release_ownership(sk);
if (waitqueue_active(&sk->sk_lock.wq))
wake_up(&sk->sk_lock.wq);
spin_unlock_bh(&sk->sk_lock.slock);
@@ -2334,6 +2500,52 @@ void sock_enable_timestamp(struct sock *sk, int flag)
}
}
+int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
+ int level, int type)
+{
+ struct sock_exterr_skb *serr;
+ struct sk_buff *skb, *skb2;
+ int copied, err;
+
+ err = -EAGAIN;
+ skb = skb_dequeue(&sk->sk_error_queue);
+ if (skb == NULL)
+ goto out;
+
+ copied = skb->len;
+ if (copied > len) {
+ msg->msg_flags |= MSG_TRUNC;
+ copied = len;
+ }
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ if (err)
+ goto out_free_skb;
+
+ sock_recv_timestamp(msg, sk, skb);
+
+ serr = SKB_EXT_ERR(skb);
+ put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
+
+ msg->msg_flags |= MSG_ERRQUEUE;
+ err = copied;
+
+ /* Reset and regenerate socket error */
+ spin_lock_bh(&sk->sk_error_queue.lock);
+ sk->sk_err = 0;
+ if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
+ sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
+ spin_unlock_bh(&sk->sk_error_queue.lock);
+ sk->sk_error_report(sk);
+ } else
+ spin_unlock_bh(&sk->sk_error_queue.lock);
+
+out_free_skb:
+ kfree_skb(skb);
+out:
+ return err;
+}
+EXPORT_SYMBOL(sock_recv_errqueue);
+
/*
* Get a socket option on an socket.
*
@@ -2754,7 +2966,7 @@ static const struct file_operations proto_seq_fops = {
static __net_init int proto_init_net(struct net *net)
{
- if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops))
+ if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
return -ENOMEM;
return 0;
@@ -2762,7 +2974,7 @@ static __net_init int proto_init_net(struct net *net)
static __net_exit void proto_exit_net(struct net *net)
{
- proc_net_remove(net, "protocols");
+ remove_proc_entry("protocols", net->proc_net);
}