2013-03-21 20:33:47 +04:00
|
|
|
#ifndef _AF_NETLINK_H
|
|
|
|
#define _AF_NETLINK_H
|
|
|
|
|
2014-08-02 11:47:45 +02:00
|
|
|
#include <linux/rhashtable.h>
|
2015-01-16 11:37:14 +01:00
|
|
|
#include <linux/atomic.h>
|
2016-11-28 19:22:12 +08:00
|
|
|
#include <linux/workqueue.h>
|
2013-03-21 20:33:47 +04:00
|
|
|
#include <net/sock.h>
|
|
|
|
|
|
|
|
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
|
|
|
|
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
|
|
|
|
|
|
|
|
struct netlink_sock {
|
|
|
|
/* struct sock has to be the first member of netlink_sock */
|
|
|
|
struct sock sk;
|
|
|
|
u32 portid;
|
|
|
|
u32 dst_portid;
|
|
|
|
u32 dst_group;
|
|
|
|
u32 flags;
|
|
|
|
u32 subscriptions;
|
|
|
|
u32 ngroups;
|
|
|
|
unsigned long *groups;
|
|
|
|
unsigned long state;
|
2014-03-07 12:02:33 -08:00
|
|
|
size_t max_recvmsg_len;
|
2013-03-21 20:33:47 +04:00
|
|
|
wait_queue_head_t wait;
|
netlink: Replace rhash_portid with bound
On Mon, Sep 21, 2015 at 02:20:22PM -0400, Tejun Heo wrote:
>
> store_release and load_acquire are different from the usual memory
> barriers and can't be paired this way. You have to pair store_release
> and load_acquire. Besides, it isn't a particularly good idea to
OK I've decided to drop the acquire/release helpers as they don't
help us at all and simply pessimises the code by using full memory
barriers (on some architectures) where only a write or read barrier
is needed.
> depend on memory barriers embedded in other data structures like the
> above. Here, especially, rhashtable_insert() would have write barrier
> *before* the entry is hashed not necessarily *after*, which means that
> in the above case, a socket which appears to have set bound to a
> reader might not visible when the reader tries to look up the socket
> on the hashtable.
But you are right we do need an explicit write barrier here to
ensure that the hashing is visible.
> There's no reason to be overly smart here. This isn't a crazy hot
> path, write barriers tend to be very cheap, store_release more so.
> Please just do smp_store_release() and note what it's paired with.
It's not about being overly smart. It's about actually understanding
what's going on with the code. I've seen too many instances of
people simply sprinkling synchronisation primitives around without
any knowledge of what is happening underneath, which is just a recipe
for creating hard-to-debug races.
> > @@ -1539,7 +1546,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
> > }
> > }
> >
> > - if (!nlk->portid) {
> > + if (!nlk->bound) {
>
> I don't think you can skip load_acquire here just because this is the
> second deref of the variable. That doesn't change anything. Race
> condition could still happen between the first and second tests and
> skipping the second would lead to the same kind of bug.
The reason this one is OK is because we do not use nlk->portid or
try to get nlk from the hash table before we return to user-space.
However, there is a real bug here that none of these acquire/release
helpers discovered. The two bound tests here used to be a single
one. Now that they are separate it is entirely possible for another
thread to come in the middle and bind the socket. So we need to
repeat the portid check in order to maintain consistency.
> > @@ -1587,7 +1594,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
> > !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
> > return -EPERM;
> >
> > - if (!nlk->portid)
> > + if (!nlk->bound)
>
> Don't we need load_acquire here too? Is this path holding a lock
> which makes that unnecessary?
Ditto.
---8<---
The commit 1f770c0a09da855a2b51af6d19de97fb955eca85 ("netlink:
Fix autobind race condition that leads to zero port ID") created
some new races that can occur due to inconcsistencies between the
two port IDs.
Tejun is right that a barrier is unavoidable. Therefore I am
reverting to the original patch that used a boolean to indicate
that a user netlink socket has been bound.
Barriers have been added where necessary to ensure that a valid
portid and the hashed socket is visible.
I have also changed netlink_insert to only return EBUSY if the
socket is bound to a portid different to the requested one. This
combined with only reading nlk->bound once in netlink_bind fixes
a race where two threads that bind the socket at the same time
with different port IDs may both succeed.
Fixes: 1f770c0a09da ("netlink: Fix autobind race condition that leads to zero port ID")
Reported-by: Tejun Heo <tj@kernel.org>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Nacked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-22 11:38:56 +08:00
|
|
|
bool bound;
|
2013-08-15 15:31:06 -07:00
|
|
|
bool cb_running;
|
af_netlink: ensure that NLMSG_DONE never fails in dumps
[ Upstream commit 0642840b8bb008528dbdf929cec9f65ac4231ad0 ]
The way people generally use netlink_dump is that they fill in the skb
as much as possible, breaking when nla_put returns an error. Then, they
get called again and start filling out the next skb, and again, and so
forth. The mechanism at work here is the ability for the iterative
dumping function to detect when the skb is filled up and not fill it
past the brim, waiting for a fresh skb for the rest of the data.
However, if the attributes are small and nicely packed, it is possible
that a dump callback function successfully fills in attributes until the
skb is of size 4080 (libmnl's default page-sized receive buffer size).
The dump function completes, satisfied, and then, if it happens to be
that this is actually the last skb, and no further ones are to be sent,
then netlink_dump will add on the NLMSG_DONE part:
nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
It is very important that netlink_dump does this, of course. However, in
this example, that call to nlmsg_put_answer will fail, because the
previous filling by the dump function did not leave it enough room. And
how could it possibly have done so? All of the nla_put variety of
functions simply check to see if the skb has enough tailroom,
independent of the context it is in.
In order to keep the important assumptions of all netlink dump users, it
is therefore important to give them an skb that has this end part of the
tail already reserved, so that the call to nlmsg_put_answer does not
fail. Otherwise, library authors are forced to find some bizarre sized
receive buffer that has a large modulo relative to the common sizes of
messages received, which is ugly and buggy.
This patch thus saves the NLMSG_DONE for an additional message, for the
case that things are dangerously close to the brim. This requires
keeping track of the errno from ->dump() across calls.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-11-09 13:04:44 +09:00
|
|
|
int dump_done_errno;
|
2013-08-15 15:31:06 -07:00
|
|
|
struct netlink_callback cb;
|
2013-03-21 20:33:47 +04:00
|
|
|
struct mutex *cb_mutex;
|
|
|
|
struct mutex cb_def_mutex;
|
|
|
|
void (*netlink_rcv)(struct sk_buff *skb);
|
2014-12-23 21:00:06 +01:00
|
|
|
int (*netlink_bind)(struct net *net, int group);
|
|
|
|
void (*netlink_unbind)(struct net *net, int group);
|
2013-03-21 20:33:47 +04:00
|
|
|
struct module *module;
|
2014-08-02 11:47:45 +02:00
|
|
|
|
|
|
|
struct rhash_head node;
|
2015-01-02 23:00:22 +01:00
|
|
|
struct rcu_head rcu;
|
2016-11-28 19:22:12 +08:00
|
|
|
struct work_struct work;
|
2013-03-21 20:33:47 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
static inline struct netlink_sock *nlk_sk(struct sock *sk)
|
|
|
|
{
|
|
|
|
return container_of(sk, struct netlink_sock, sk);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct netlink_table {
|
2014-08-02 11:47:45 +02:00
|
|
|
struct rhashtable hash;
|
2013-03-21 20:33:47 +04:00
|
|
|
struct hlist_head mc_list;
|
|
|
|
struct listeners __rcu *listeners;
|
|
|
|
unsigned int flags;
|
|
|
|
unsigned int groups;
|
|
|
|
struct mutex *cb_mutex;
|
|
|
|
struct module *module;
|
2014-12-23 21:00:06 +01:00
|
|
|
int (*bind)(struct net *net, int group);
|
|
|
|
void (*unbind)(struct net *net, int group);
|
2013-06-06 14:49:11 +08:00
|
|
|
bool (*compare)(struct net *net, struct sock *sock);
|
2013-03-21 20:33:47 +04:00
|
|
|
int registered;
|
|
|
|
};
|
|
|
|
|
|
|
|
extern struct netlink_table *nl_table;
|
|
|
|
extern rwlock_t nl_table_lock;
|
|
|
|
|
|
|
|
#endif
|