exynos-linux-stable/net/sched/act_api.c

1106 lines
24 KiB
C
Raw Normal View History

/*
* net/sched/act_api.c Packet action API.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Author: Jamal Hadi Salim
*
*
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 17:04:11 +09:00
#include <linux/slab.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
#include <linux/err.h>
#include <linux/module.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/sch_generic.h>
#include <net/act_api.h>
#include <net/netlink.h>
static void free_tcf(struct rcu_head *head)
{
struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu);
free_percpu(p->cpu_bstats);
free_percpu(p->cpu_qstats);
kfree(p);
}
static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p)
{
spin_lock_bh(&hinfo->lock);
hlist_del(&p->tcfa_head);
spin_unlock_bh(&hinfo->lock);
gen_kill_estimator(&p->tcfa_bstats,
&p->tcfa_rate_est);
/*
* gen_estimator est_timer() might access p->tcfa_lock
* or bstats, wait a RCU grace period before freeing p
*/
call_rcu(&p->tcfa_rcu, free_tcf);
}
int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
{
int ret = 0;
if (p) {
if (bind)
p->tcfa_bindcnt--;
else if (strict && p->tcfa_bindcnt > 0)
return -EPERM;
p->tcfa_refcnt--;
if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
if (p->ops->cleanup)
p->ops->cleanup(p, bind);
tcf_hash_destroy(p->hinfo, p);
ret = ACT_P_DELETED;
}
}
net: sched: fix refcount imbalance in actions Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action outside"), we end up with a wrong reference count for a tc action. Test case 1: FOO="1,6 0 0 4294967295," BAR="1,6 0 0 4294967294," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \ action bpf bytecode "$FOO" tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 1 bind 1 tc actions replace action bpf bytecode "$BAR" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 1 ref 2 bind 1 tc actions replace action bpf bytecode "$FOO" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 3 bind 1 Test case 2: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 1 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 2 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 3 bind 1 What happens is that in tcf_hash_check(), we check tcf_common for a given index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've found an existing action. Now there are the following cases: 1) We do a late binding of an action. In that case, we leave the tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init() handler. This is correctly handeled. 2) We replace the given action, or we try to add one without replacing and find out that the action at a specific index already exists (thus, we go out with error in that case). In case of 2), we have to undo the reference count increase from tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to do so because of the 'tcfc_bindcnt > 0' check which bails out early with an -EPERM error. Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an already classifier-bound action to drop the reference count (which could then become negative, wrap around etc), this restriction only accounts for invocations outside a specific action's ->init() handler. One possible solution would be to add a flag thus we possibly trigger the -EPERM ony in situations where it is indeed relevant. After the patch, above test cases have correct reference count again. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Cong Wang <cwang@twopensource.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-29 23:35:25 +02:00
return ret;
}
net: sched: fix refcount imbalance in actions Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action outside"), we end up with a wrong reference count for a tc action. Test case 1: FOO="1,6 0 0 4294967295," BAR="1,6 0 0 4294967294," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \ action bpf bytecode "$FOO" tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 1 bind 1 tc actions replace action bpf bytecode "$BAR" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 1 ref 2 bind 1 tc actions replace action bpf bytecode "$FOO" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 3 bind 1 Test case 2: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 1 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 2 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 3 bind 1 What happens is that in tcf_hash_check(), we check tcf_common for a given index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've found an existing action. Now there are the following cases: 1) We do a late binding of an action. In that case, we leave the tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init() handler. This is correctly handeled. 2) We replace the given action, or we try to add one without replacing and find out that the action at a specific index already exists (thus, we go out with error in that case). In case of 2), we have to undo the reference count increase from tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to do so because of the 'tcfc_bindcnt > 0' check which bails out early with an -EPERM error. Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an already classifier-bound action to drop the reference count (which could then become negative, wrap around etc), this restriction only accounts for invocations outside a specific action's ->init() handler. One possible solution would be to add a flag thus we possibly trigger the -EPERM ony in situations where it is indeed relevant. After the patch, above test cases have correct reference count again. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Cong Wang <cwang@twopensource.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-29 23:35:25 +02:00
EXPORT_SYMBOL(__tcf_hash_release);
static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
struct netlink_callback *cb)
{
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
struct nlattr *nest;
spin_lock_bh(&hinfo->lock);
s_i = cb->args[0];
for (i = 0; i < (hinfo->hmask + 1); i++) {
struct hlist_head *head;
struct tc_action *p;
head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
hlist_for_each_entry_rcu(p, head, tcfa_head) {
index++;
if (index < s_i)
continue;
nest = nla_nest_start(skb, n_i);
net sched actions: fix dumping which requires several messages to user space [ Upstream commit 734549eb550c0c720bc89e50501f1b1e98cdd841 ] Fixes a bug in the tcf_dump_walker function that can cause some actions to not be reported when dumping a large number of actions. This issue became more aggrevated when cookies feature was added. In particular this issue is manifest when large cookie values are assigned to the actions and when enough actions are created that the resulting table must be dumped in multiple batches. The number of actions returned in each batch is limited by the total number of actions and the memory buffer size. With small cookies the numeric limit is reached before the buffer size limit, which avoids the code path triggering this bug. When large cookies are used buffer fills before the numeric limit, and the erroneous code path is hit. For example after creating 32 csum actions with the cookie aaaabbbbccccdddd $ tc actions ls action csum total acts 26 action order 0: csum (tcp) action continue index 1 ref 1 bind 0 cookie aaaabbbbccccdddd ..... action order 25: csum (tcp) action continue index 26 ref 1 bind 0 cookie aaaabbbbccccdddd total acts 6 action order 0: csum (tcp) action continue index 28 ref 1 bind 0 cookie aaaabbbbccccdddd ...... action order 5: csum (tcp) action continue index 32 ref 1 bind 0 cookie aaaabbbbccccdddd Note that the action with index 27 is omitted from the report. Fixes: 4b3550ef530c ("[NET_SCHED]: Use nla_nest_start/nla_nest_end")" Signed-off-by: Craig Dillabaugh <cdillaba@mojatatu.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-03-26 14:58:32 -04:00
if (nest == NULL) {
index--;
goto nla_put_failure;
net sched actions: fix dumping which requires several messages to user space [ Upstream commit 734549eb550c0c720bc89e50501f1b1e98cdd841 ] Fixes a bug in the tcf_dump_walker function that can cause some actions to not be reported when dumping a large number of actions. This issue became more aggrevated when cookies feature was added. In particular this issue is manifest when large cookie values are assigned to the actions and when enough actions are created that the resulting table must be dumped in multiple batches. The number of actions returned in each batch is limited by the total number of actions and the memory buffer size. With small cookies the numeric limit is reached before the buffer size limit, which avoids the code path triggering this bug. When large cookies are used buffer fills before the numeric limit, and the erroneous code path is hit. For example after creating 32 csum actions with the cookie aaaabbbbccccdddd $ tc actions ls action csum total acts 26 action order 0: csum (tcp) action continue index 1 ref 1 bind 0 cookie aaaabbbbccccdddd ..... action order 25: csum (tcp) action continue index 26 ref 1 bind 0 cookie aaaabbbbccccdddd total acts 6 action order 0: csum (tcp) action continue index 28 ref 1 bind 0 cookie aaaabbbbccccdddd ...... action order 5: csum (tcp) action continue index 32 ref 1 bind 0 cookie aaaabbbbccccdddd Note that the action with index 27 is omitted from the report. Fixes: 4b3550ef530c ("[NET_SCHED]: Use nla_nest_start/nla_nest_end")" Signed-off-by: Craig Dillabaugh <cdillaba@mojatatu.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-03-26 14:58:32 -04:00
}
err = tcf_action_dump_1(skb, p, 0, 0);
if (err < 0) {
index--;
nlmsg_trim(skb, nest);
goto done;
}
nla_nest_end(skb, nest);
n_i++;
if (n_i >= TCA_ACT_MAX_PRIO)
goto done;
}
}
done:
spin_unlock_bh(&hinfo->lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
nla_put_failure:
nla_nest_cancel(skb, nest);
goto done;
}
static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
const struct tc_action_ops *ops)
{
struct nlattr *nest;
int i = 0, n_i = 0;
int ret = -EINVAL;
nest = nla_nest_start(skb, 0);
if (nest == NULL)
goto nla_put_failure;
if (nla_put_string(skb, TCA_KIND, ops->kind))
goto nla_put_failure;
for (i = 0; i < (hinfo->hmask + 1); i++) {
struct hlist_head *head;
struct hlist_node *n;
struct tc_action *p;
head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
hlist_for_each_entry_safe(p, n, head, tcfa_head) {
ret = __tcf_hash_release(p, false, true);
if (ret == ACT_P_DELETED) {
module_put(ops->owner);
n_i++;
} else if (ret < 0)
goto nla_put_failure;
}
}
if (nla_put_u32(skb, TCA_FCNT, n_i))
goto nla_put_failure;
nla_nest_end(skb, nest);
return n_i;
nla_put_failure:
nla_nest_cancel(skb, nest);
return ret;
}
int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
struct netlink_callback *cb, int type,
const struct tc_action_ops *ops)
{
struct tcf_hashinfo *hinfo = tn->hinfo;
if (type == RTM_DELACTION) {
return tcf_del_walker(hinfo, skb, ops);
} else if (type == RTM_GETACTION) {
return tcf_dump_walker(hinfo, skb, cb);
} else {
WARN(1, "tcf_generic_walker: unknown action %d\n", type);
return -EINVAL;
}
}
EXPORT_SYMBOL(tcf_generic_walker);
static struct tc_action *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
{
struct tc_action *p = NULL;
struct hlist_head *head;
spin_lock_bh(&hinfo->lock);
head = &hinfo->htab[tcf_hash(index, hinfo->hmask)];
hlist_for_each_entry_rcu(p, head, tcfa_head)
if (p->tcfa_index == index)
break;
spin_unlock_bh(&hinfo->lock);
return p;
}
u32 tcf_hash_new_index(struct tc_action_net *tn)
{
struct tcf_hashinfo *hinfo = tn->hinfo;
u32 val = hinfo->index;
do {
if (++val == 0)
val = 1;
} while (tcf_hash_lookup(val, hinfo));
hinfo->index = val;
return val;
}
EXPORT_SYMBOL(tcf_hash_new_index);
int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
{
struct tcf_hashinfo *hinfo = tn->hinfo;
struct tc_action *p = tcf_hash_lookup(index, hinfo);
if (p) {
*a = p;
return 1;
}
return 0;
}
EXPORT_SYMBOL(tcf_hash_search);
bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
int bind)
{
struct tcf_hashinfo *hinfo = tn->hinfo;
struct tc_action *p = NULL;
if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
if (bind)
p->tcfa_bindcnt++;
p->tcfa_refcnt++;
*a = p;
return true;
}
return false;
}
EXPORT_SYMBOL(tcf_hash_check);
void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
{
if (est)
gen_kill_estimator(&a->tcfa_bstats,
&a->tcfa_rate_est);
call_rcu(&a->tcfa_rcu, free_tcf);
}
EXPORT_SYMBOL(tcf_hash_cleanup);
int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action **a, const struct tc_action_ops *ops,
int bind, bool cpustats)
{
struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
struct tcf_hashinfo *hinfo = tn->hinfo;
int err = -ENOMEM;
if (unlikely(!p))
return -ENOMEM;
p->tcfa_refcnt = 1;
if (bind)
p->tcfa_bindcnt = 1;
if (cpustats) {
p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
if (!p->cpu_bstats) {
err1:
kfree(p);
return err;
}
p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
if (!p->cpu_qstats) {
err2:
free_percpu(p->cpu_bstats);
goto err1;
}
}
spin_lock_init(&p->tcfa_lock);
INIT_HLIST_NODE(&p->tcfa_head);
p->tcfa_index = index ? index : tcf_hash_new_index(tn);
p->tcfa_tm.install = jiffies;
p->tcfa_tm.lastuse = jiffies;
p->tcfa_tm.firstuse = 0;
if (est) {
err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
&p->tcfa_rate_est,
&p->tcfa_lock, NULL, est);
if (err) {
free_percpu(p->cpu_qstats);
goto err2;
}
}
p->hinfo = hinfo;
p->ops = ops;
INIT_LIST_HEAD(&p->list);
*a = p;
return 0;
}
EXPORT_SYMBOL(tcf_hash_create);
void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a)
{
struct tcf_hashinfo *hinfo = tn->hinfo;
unsigned int h = tcf_hash(a->tcfa_index, hinfo->hmask);
spin_lock_bh(&hinfo->lock);
hlist_add_head(&a->tcfa_head, &hinfo->htab[h]);
spin_unlock_bh(&hinfo->lock);
}
EXPORT_SYMBOL(tcf_hash_insert);
void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
struct tcf_hashinfo *hinfo)
{
int i;
for (i = 0; i < hinfo->hmask + 1; i++) {
struct tc_action *p;
struct hlist_node *n;
hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfa_head) {
int ret;
ret = __tcf_hash_release(p, false, true);
if (ret == ACT_P_DELETED)
module_put(ops->owner);
else if (ret < 0)
return;
}
}
kfree(hinfo->htab);
}
EXPORT_SYMBOL(tcf_hashinfo_destroy);
static LIST_HEAD(act_base);
static DEFINE_RWLOCK(act_mod_lock);
int tcf_register_action(struct tc_action_ops *act,
struct pernet_operations *ops)
{
struct tc_action_ops *a;
int ret;
if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup)
return -EINVAL;
/* We have to register pernet ops before making the action ops visible,
* otherwise tcf_action_init_1() could get a partially initialized
* netns.
*/
ret = register_pernet_subsys(ops);
if (ret)
return ret;
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
write_unlock(&act_mod_lock);
unregister_pernet_subsys(ops);
return -EEXIST;
}
}
list_add_tail(&act->head, &act_base);
write_unlock(&act_mod_lock);
return 0;
}
EXPORT_SYMBOL(tcf_register_action);
int tcf_unregister_action(struct tc_action_ops *act,
struct pernet_operations *ops)
{
struct tc_action_ops *a;
int err = -ENOENT;
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (a == act) {
list_del(&act->head);
err = 0;
break;
}
}
write_unlock(&act_mod_lock);
if (!err)
unregister_pernet_subsys(ops);
return err;
}
EXPORT_SYMBOL(tcf_unregister_action);
/* lookup by name */
static struct tc_action_ops *tc_lookup_action_n(char *kind)
{
struct tc_action_ops *a, *res = NULL;
if (kind) {
read_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (strcmp(kind, a->kind) == 0) {
if (try_module_get(a->owner))
res = a;
break;
}
}
read_unlock(&act_mod_lock);
}
return res;
}
/* lookup by nlattr */
static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
{
struct tc_action_ops *a, *res = NULL;
if (kind) {
read_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (nla_strcmp(kind, a->kind) == 0) {
if (try_module_get(a->owner))
res = a;
break;
}
}
read_unlock(&act_mod_lock);
}
return res;
}
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
int nr_actions, struct tcf_result *res)
{
int ret = -1, i;
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
ret = TC_ACT_OK;
goto exec_done;
}
for (i = 0; i < nr_actions; i++) {
const struct tc_action *a = actions[i];
repeat:
ret = a->ops->act(skb, a, res);
if (ret == TC_ACT_REPEAT)
goto repeat; /* we need a ttl - JHS */
if (ret != TC_ACT_PIPE)
goto exec_done;
}
exec_done:
return ret;
}
EXPORT_SYMBOL(tcf_action_exec);
int tcf_action_destroy(struct list_head *actions, int bind)
{
const struct tc_action_ops *ops;
struct tc_action *a, *tmp;
int ret = 0;
list_for_each_entry_safe(a, tmp, actions, list) {
ops = a->ops;
net: sched: fix refcount imbalance in actions Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action outside"), we end up with a wrong reference count for a tc action. Test case 1: FOO="1,6 0 0 4294967295," BAR="1,6 0 0 4294967294," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \ action bpf bytecode "$FOO" tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 1 bind 1 tc actions replace action bpf bytecode "$BAR" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 1 ref 2 bind 1 tc actions replace action bpf bytecode "$FOO" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 3 bind 1 Test case 2: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 1 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 2 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 3 bind 1 What happens is that in tcf_hash_check(), we check tcf_common for a given index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've found an existing action. Now there are the following cases: 1) We do a late binding of an action. In that case, we leave the tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init() handler. This is correctly handeled. 2) We replace the given action, or we try to add one without replacing and find out that the action at a specific index already exists (thus, we go out with error in that case). In case of 2), we have to undo the reference count increase from tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to do so because of the 'tcfc_bindcnt > 0' check which bails out early with an -EPERM error. Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an already classifier-bound action to drop the reference count (which could then become negative, wrap around etc), this restriction only accounts for invocations outside a specific action's ->init() handler. One possible solution would be to add a flag thus we possibly trigger the -EPERM ony in situations where it is indeed relevant. After the patch, above test cases have correct reference count again. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Cong Wang <cwang@twopensource.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-29 23:35:25 +02:00
ret = __tcf_hash_release(a, bind, true);
if (ret == ACT_P_DELETED)
module_put(ops->owner);
else if (ret < 0)
return ret;
}
return ret;
}
int
tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
return a->ops->dump(skb, a, bind, ref);
}
int
tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
int err = -EINVAL;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
if (nla_put_string(skb, TCA_KIND, a->ops->kind))
goto nla_put_failure;
if (tcf_action_copy_stats(skb, a, 0))
goto nla_put_failure;
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
err = tcf_action_dump_old(skb, a, bind, ref);
if (err > 0) {
nla_nest_end(skb, nest);
return err;
}
nla_put_failure:
nlmsg_trim(skb, b);
return -1;
}
EXPORT_SYMBOL(tcf_action_dump_1);
int tcf_action_dump(struct sk_buff *skb, struct list_head *actions,
int bind, int ref)
{
struct tc_action *a;
int err = -EINVAL;
struct nlattr *nest;
list_for_each_entry(a, actions, list) {
nest = nla_nest_start(skb, a->order);
if (nest == NULL)
goto nla_put_failure;
err = tcf_action_dump_1(skb, a, bind, ref);
if (err < 0)
goto errout;
nla_nest_end(skb, nest);
}
return 0;
nla_put_failure:
err = -EINVAL;
errout:
nla_nest_cancel(skb, nest);
return err;
}
struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
struct nlattr *est, char *name, int ovr,
int bind)
{
struct tc_action *a;
struct tc_action_ops *a_o;
char act_name[IFNAMSIZ];
struct nlattr *tb[TCA_ACT_MAX + 1];
struct nlattr *kind;
int err;
if (name == NULL) {
err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
if (err < 0)
goto err_out;
err = -EINVAL;
kind = tb[TCA_ACT_KIND];
if (kind == NULL)
goto err_out;
if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
goto err_out;
} else {
err = -EINVAL;
if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
goto err_out;
}
a_o = tc_lookup_action_n(act_name);
if (a_o == NULL) {
#ifdef CONFIG_MODULES
rtnl_unlock();
request_module("act_%s", act_name);
rtnl_lock();
a_o = tc_lookup_action_n(act_name);
/* We dropped the RTNL semaphore in order to
* perform the module load. So, even if we
* succeeded in loading the module we have to
* tell the caller to replay the request. We
* indicate this using -EAGAIN.
*/
if (a_o != NULL) {
err = -EAGAIN;
goto err_mod;
}
#endif
err = -ENOENT;
goto err_out;
}
/* backward compatibility for policer */
if (name == NULL)
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind);
else
err = a_o->init(net, nla, est, &a, ovr, bind);
if (err < 0)
goto err_mod;
/* module count goes up only when brand new policy is created
* if it exists and is only bound to in a_o->init() then
* ACT_P_CREATED is not returned (a zero is).
*/
if (err != ACT_P_CREATED)
module_put(a_o->owner);
return a;
err_mod:
module_put(a_o->owner);
err_out:
return ERR_PTR(err);
}
static void cleanup_a(struct list_head *actions, int ovr)
{
struct tc_action *a;
if (!ovr)
return;
list_for_each_entry(a, actions, list)
a->tcfa_refcnt--;
}
int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind, struct list_head *actions)
{
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
int err;
int i;
err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
if (err < 0)
return err;
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(net, tb[i], est, name, ovr, bind);
if (IS_ERR(act)) {
err = PTR_ERR(act);
goto err;
}
act->order = i;
if (ovr)
act->tcfa_refcnt++;
list_add_tail(&act->list, actions);
}
/* Remove the temp refcnt which was necessary to protect against
* destroying an existing action which was being replaced
*/
cleanup_a(actions, ovr);
return 0;
err:
tcf_action_destroy(actions, bind);
return err;
}
int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
int compat_mode)
{
int err = 0;
struct gnet_dump d;
if (p == NULL)
goto errout;
/* compat_mode being true specifies a call that is supposed
* to add additional backward compatibility statistic TLVs.
*/
if (compat_mode) {
if (p->type == TCA_OLD_COMPAT)
err = gnet_stats_start_copy_compat(skb, 0,
TCA_STATS,
TCA_XSTATS,
&p->tcfa_lock, &d,
TCA_PAD);
else
return 0;
} else
err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
&p->tcfa_lock, &d, TCA_ACT_PAD);
if (err < 0)
goto errout;
if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfa_bstats,
&p->tcfa_rate_est) < 0 ||
gnet_stats_copy_queue(&d, p->cpu_qstats,
&p->tcfa_qstats,
p->tcfa_qstats.qlen) < 0)
goto errout;
if (gnet_stats_finish_copy(&d) < 0)
goto errout;
return 0;
errout:
return -1;
}
static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
u32 portid, u32 seq, u16 flags, int event, int bind,
int ref)
{
struct tcamsg *t;
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags);
if (!nlh)
goto out_nlmsg_trim;
t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
goto out_nlmsg_trim;
if (tcf_action_dump(skb, actions, bind, ref) < 0)
goto out_nlmsg_trim;
nla_nest_end(skb, nest);
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
out_nlmsg_trim:
nlmsg_trim(skb, b);
return -1;
}
static int
act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
struct list_head *actions, int event)
{
struct sk_buff *skb;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
0, 0) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
return rtnl_unicast(skb, net, portid);
}
static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
struct nlmsghdr *n, u32 portid)
{
struct nlattr *tb[TCA_ACT_MAX + 1];
const struct tc_action_ops *ops;
struct tc_action *a;
int index;
int err;
err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
if (err < 0)
goto err_out;
err = -EINVAL;
if (tb[TCA_ACT_INDEX] == NULL ||
nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
goto err_out;
index = nla_get_u32(tb[TCA_ACT_INDEX]);
err = -EINVAL;
ops = tc_lookup_action(tb[TCA_ACT_KIND]);
if (!ops) /* could happen in batch of actions */
goto err_out;
err = -ENOENT;
if (ops->lookup(net, &a, index) == 0)
goto err_mod;
module_put(ops->owner);
return a;
err_mod:
module_put(ops->owner);
err_out:
return ERR_PTR(err);
}
static int tca_action_flush(struct net *net, struct nlattr *nla,
struct nlmsghdr *n, u32 portid)
{
struct sk_buff *skb;
unsigned char *b;
struct nlmsghdr *nlh;
struct tcamsg *t;
struct netlink_callback dcb;
struct nlattr *nest;
struct nlattr *tb[TCA_ACT_MAX + 1];
const struct tc_action_ops *ops;
struct nlattr *kind;
int err = -ENOMEM;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb) {
pr_debug("tca_action_flush: failed skb alloc\n");
return err;
}
b = skb_tail_pointer(skb);
err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
if (err < 0)
goto err_out;
err = -EINVAL;
kind = tb[TCA_ACT_KIND];
ops = tc_lookup_action(kind);
if (!ops) /*some idjot trying to flush unknown action */
goto err_out;
nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
sizeof(*t), 0);
if (!nlh)
goto out_module_put;
t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
goto out_module_put;
err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
net sched actions: decrement module reference count after table flush. [ Upstream commit edb9d1bff4bbe19b8ae0e71b1f38732591a9eeb2 ] When tc actions are loaded as a module and no actions have been installed, flushing them would result in actions removed from the memory, but modules reference count not being decremented, so that the modules would not be unloaded. Following is example with GACT action: % sudo modprobe act_gact % lsmod Module Size Used by act_gact 16384 0 % % sudo tc actions ls action gact % % sudo tc actions flush action gact % lsmod Module Size Used by act_gact 16384 1 % sudo tc actions flush action gact % lsmod Module Size Used by act_gact 16384 2 % sudo rmmod act_gact rmmod: ERROR: Module act_gact is in use .... After the fix: % lsmod Module Size Used by act_gact 16384 0 % % sudo tc actions add action pass index 1 % sudo tc actions add action pass index 2 % sudo tc actions add action pass index 3 % lsmod Module Size Used by act_gact 16384 3 % % sudo tc actions flush action gact % lsmod Module Size Used by act_gact 16384 0 % % sudo tc actions flush action gact % lsmod Module Size Used by act_gact 16384 0 % sudo rmmod act_gact % lsmod Module Size Used by % Fixes: f97017cdefef ("net-sched: Fix actions flushing") Signed-off-by: Roman Mashak <mrv@mojatatu.com> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-02-24 11:00:32 -05:00
if (err <= 0)
goto out_module_put;
nla_nest_end(skb, nest);
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
nlh->nlmsg_flags |= NLM_F_ROOT;
module_put(ops->owner);
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (err > 0)
return 0;
return err;
out_module_put:
module_put(ops->owner);
err_out:
kfree_skb(skb);
return err;
}
static int
tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
u32 portid)
{
int ret;
struct sk_buff *skb;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
0, 1) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
/* now do the delete */
ret = tcf_action_destroy(actions, 0);
if (ret < 0) {
kfree_skb(skb);
return ret;
}
ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (ret > 0)
return 0;
return ret;
}
static int
tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
u32 portid, int event)
{
int i, ret;
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
LIST_HEAD(actions);
ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
if (ret < 0)
return ret;
if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
if (tb[1] != NULL)
return tca_action_flush(net, tb[1], n, portid);
else
return -EINVAL;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_get_1(net, tb[i], n, portid);
if (IS_ERR(act)) {
ret = PTR_ERR(act);
goto err;
}
act->order = i;
list_add_tail(&act->list, &actions);
}
if (event == RTM_GETACTION)
ret = act_get_notify(net, portid, n, &actions, event);
else { /* delete */
ret = tcf_del_notify(net, n, &actions, portid);
if (ret)
goto err;
return ret;
}
err:
net sched actions: fix refcnt when GETing of action after bind [ Upstream commit 0faa9cb5b3836a979864a6357e01d2046884ad52 ] Demonstrating the issue: .. add a drop action $sudo $TC actions add action drop index 10 .. retrieve it $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 2 bind 0 installed 29 sec used 29 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 ... bug 1 above: reference is two. Reference is actually 1 but we forget to subtract 1. ... do a GET again and we see the same issue try a few times and nothing changes ~$ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 2 bind 0 installed 31 sec used 31 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 ... lets try to bind the action to a filter.. $ sudo $TC qdisc add dev lo ingress $ sudo $TC filter add dev lo parent ffff: protocol ip prio 1 \ u32 match ip dst 127.0.0.1/32 flowid 1:1 action gact index 10 ... and now a few GETs: $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 3 bind 1 installed 204 sec used 204 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 4 bind 1 installed 206 sec used 206 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 5 bind 1 installed 235 sec used 235 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 .... as can be observed the reference count keeps going up. After the fix $ sudo $TC actions add action drop index 10 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 1 bind 0 installed 4 sec used 4 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 1 bind 0 installed 6 sec used 6 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC qdisc add dev lo ingress $ sudo $TC filter add dev lo parent ffff: protocol ip prio 1 \ u32 match ip dst 127.0.0.1/32 flowid 1:1 action gact index 10 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 2 bind 1 installed 32 sec used 32 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 2 bind 1 installed 33 sec used 33 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 Fixes: aecc5cefc389 ("net sched actions: fix GETing actions") Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-01-15 10:14:06 -05:00
if (event != RTM_GETACTION)
tcf_action_destroy(&actions, 0);
return ret;
}
static int
tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
u32 portid)
{
struct sk_buff *skb;
int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
RTM_NEWACTION, 0, 0) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (err > 0)
err = 0;
return err;
}
static int tcf_action_add(struct net *net, struct nlattr *nla,
struct nlmsghdr *n, u32 portid, int ovr)
{
net: avoid potential infinite loop in tc_ctl_action() [ Upstream commit 39f13ea2f61b439ebe0060393e9c39925c9ee28c ] tc_ctl_action() has the ability to loop forever if tcf_action_add() returns -EAGAIN. This special case has been done in case a module needed to be loaded, but it turns out that tcf_add_notify() could also return -EAGAIN if the socket sk_rcvbuf limit is hit. We need to separate the two cases, and only loop for the module loading case. While we are at it, add a limit of 10 attempts since unbounded loops are always scary. syzbot repro was something like : socket(PF_NETLINK, SOCK_RAW|SOCK_NONBLOCK, NETLINK_ROUTE) = 3 write(3, ..., 38) = 38 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [0], 4) = 0 sendmsg(3, {msg_name(0)=NULL, msg_iov(1)=[{..., 388}], msg_controllen=0, msg_flags=0x10}, ...) NMI backtrace for cpu 0 CPU: 0 PID: 1054 Comm: khungtaskd Not tainted 5.4.0-rc1+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 nmi_cpu_backtrace.cold+0x70/0xb2 lib/nmi_backtrace.c:101 nmi_trigger_cpumask_backtrace+0x23b/0x28b lib/nmi_backtrace.c:62 arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38 trigger_all_cpu_backtrace include/linux/nmi.h:146 [inline] check_hung_uninterruptible_tasks kernel/hung_task.c:205 [inline] watchdog+0x9d0/0xef0 kernel/hung_task.c:289 kthread+0x361/0x430 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 8859 Comm: syz-executor910 Not tainted 5.4.0-rc1+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:arch_local_save_flags arch/x86/include/asm/paravirt.h:751 [inline] RIP: 0010:lockdep_hardirqs_off+0x1df/0x2e0 kernel/locking/lockdep.c:3453 Code: 5c 08 00 00 5b 41 5c 41 5d 5d c3 48 c7 c0 58 1d f3 88 48 ba 00 00 00 00 00 fc ff df 48 c1 e8 03 80 3c 10 00 0f 85 d3 00 00 00 <48> 83 3d 21 9e 99 07 00 0f 84 b9 00 00 00 9c 58 0f 1f 44 00 00 f6 RSP: 0018:ffff8880a6f3f1b8 EFLAGS: 00000046 RAX: 1ffffffff11e63ab RBX: ffff88808c9c6080 RCX: 0000000000000000 RDX: dffffc0000000000 RSI: 0000000000000000 RDI: ffff88808c9c6914 RBP: ffff8880a6f3f1d0 R08: ffff88808c9c6080 R09: fffffbfff16be5d1 R10: fffffbfff16be5d0 R11: 0000000000000003 R12: ffffffff8746591f R13: ffff88808c9c6080 R14: ffffffff8746591f R15: 0000000000000003 FS: 00000000011e4880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffff600400 CR3: 00000000a8920000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: trace_hardirqs_off+0x62/0x240 kernel/trace/trace_preemptirq.c:45 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline] _raw_spin_lock_irqsave+0x6f/0xcd kernel/locking/spinlock.c:159 __wake_up_common_lock+0xc8/0x150 kernel/sched/wait.c:122 __wake_up+0xe/0x10 kernel/sched/wait.c:142 netlink_unlock_table net/netlink/af_netlink.c:466 [inline] netlink_unlock_table net/netlink/af_netlink.c:463 [inline] netlink_broadcast_filtered+0x705/0xb80 net/netlink/af_netlink.c:1514 netlink_broadcast+0x3a/0x50 net/netlink/af_netlink.c:1534 rtnetlink_send+0xdd/0x110 net/core/rtnetlink.c:714 tcf_add_notify net/sched/act_api.c:1343 [inline] tcf_action_add+0x243/0x370 net/sched/act_api.c:1362 tc_ctl_action+0x3b5/0x4bc net/sched/act_api.c:1410 rtnetlink_rcv_msg+0x463/0xb00 net/core/rtnetlink.c:5386 netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5404 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0x531/0x710 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x8a5/0xd60 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:657 ___sys_sendmsg+0x803/0x920 net/socket.c:2311 __sys_sendmsg+0x105/0x1d0 net/socket.c:2356 __do_sys_sendmsg net/socket.c:2365 [inline] __se_sys_sendmsg net/socket.c:2363 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2363 do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x440939 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot+cf0adbb9c28c8866c788@syzkaller.appspotmail.com Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-10-14 11:22:30 -07:00
int loop, ret;
LIST_HEAD(actions);
net: avoid potential infinite loop in tc_ctl_action() [ Upstream commit 39f13ea2f61b439ebe0060393e9c39925c9ee28c ] tc_ctl_action() has the ability to loop forever if tcf_action_add() returns -EAGAIN. This special case has been done in case a module needed to be loaded, but it turns out that tcf_add_notify() could also return -EAGAIN if the socket sk_rcvbuf limit is hit. We need to separate the two cases, and only loop for the module loading case. While we are at it, add a limit of 10 attempts since unbounded loops are always scary. syzbot repro was something like : socket(PF_NETLINK, SOCK_RAW|SOCK_NONBLOCK, NETLINK_ROUTE) = 3 write(3, ..., 38) = 38 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [0], 4) = 0 sendmsg(3, {msg_name(0)=NULL, msg_iov(1)=[{..., 388}], msg_controllen=0, msg_flags=0x10}, ...) NMI backtrace for cpu 0 CPU: 0 PID: 1054 Comm: khungtaskd Not tainted 5.4.0-rc1+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 nmi_cpu_backtrace.cold+0x70/0xb2 lib/nmi_backtrace.c:101 nmi_trigger_cpumask_backtrace+0x23b/0x28b lib/nmi_backtrace.c:62 arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38 trigger_all_cpu_backtrace include/linux/nmi.h:146 [inline] check_hung_uninterruptible_tasks kernel/hung_task.c:205 [inline] watchdog+0x9d0/0xef0 kernel/hung_task.c:289 kthread+0x361/0x430 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 8859 Comm: syz-executor910 Not tainted 5.4.0-rc1+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:arch_local_save_flags arch/x86/include/asm/paravirt.h:751 [inline] RIP: 0010:lockdep_hardirqs_off+0x1df/0x2e0 kernel/locking/lockdep.c:3453 Code: 5c 08 00 00 5b 41 5c 41 5d 5d c3 48 c7 c0 58 1d f3 88 48 ba 00 00 00 00 00 fc ff df 48 c1 e8 03 80 3c 10 00 0f 85 d3 00 00 00 <48> 83 3d 21 9e 99 07 00 0f 84 b9 00 00 00 9c 58 0f 1f 44 00 00 f6 RSP: 0018:ffff8880a6f3f1b8 EFLAGS: 00000046 RAX: 1ffffffff11e63ab RBX: ffff88808c9c6080 RCX: 0000000000000000 RDX: dffffc0000000000 RSI: 0000000000000000 RDI: ffff88808c9c6914 RBP: ffff8880a6f3f1d0 R08: ffff88808c9c6080 R09: fffffbfff16be5d1 R10: fffffbfff16be5d0 R11: 0000000000000003 R12: ffffffff8746591f R13: ffff88808c9c6080 R14: ffffffff8746591f R15: 0000000000000003 FS: 00000000011e4880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffff600400 CR3: 00000000a8920000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: trace_hardirqs_off+0x62/0x240 kernel/trace/trace_preemptirq.c:45 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline] _raw_spin_lock_irqsave+0x6f/0xcd kernel/locking/spinlock.c:159 __wake_up_common_lock+0xc8/0x150 kernel/sched/wait.c:122 __wake_up+0xe/0x10 kernel/sched/wait.c:142 netlink_unlock_table net/netlink/af_netlink.c:466 [inline] netlink_unlock_table net/netlink/af_netlink.c:463 [inline] netlink_broadcast_filtered+0x705/0xb80 net/netlink/af_netlink.c:1514 netlink_broadcast+0x3a/0x50 net/netlink/af_netlink.c:1534 rtnetlink_send+0xdd/0x110 net/core/rtnetlink.c:714 tcf_add_notify net/sched/act_api.c:1343 [inline] tcf_action_add+0x243/0x370 net/sched/act_api.c:1362 tc_ctl_action+0x3b5/0x4bc net/sched/act_api.c:1410 rtnetlink_rcv_msg+0x463/0xb00 net/core/rtnetlink.c:5386 netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5404 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0x531/0x710 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x8a5/0xd60 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:657 ___sys_sendmsg+0x803/0x920 net/socket.c:2311 __sys_sendmsg+0x105/0x1d0 net/socket.c:2356 __do_sys_sendmsg net/socket.c:2365 [inline] __se_sys_sendmsg net/socket.c:2363 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2363 do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x440939 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot+cf0adbb9c28c8866c788@syzkaller.appspotmail.com Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-10-14 11:22:30 -07:00
for (loop = 0; loop < 10; loop++) {
ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
if (ret != -EAGAIN)
break;
}
if (ret)
return ret;
return tcf_add_notify(net, n, &actions, portid);
}
static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_ACT_MAX + 1];
u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
if ((n->nlmsg_type != RTM_GETACTION) &&
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
if (ret < 0)
return ret;
if (tca[TCA_ACT_TAB] == NULL) {
pr_notice("tc_ctl_action: received NO action attribs\n");
return -EINVAL;
}
/* n->nlmsg_flags & NLM_F_CREATE */
switch (n->nlmsg_type) {
case RTM_NEWACTION:
/* we are going to assume all other flags
* imply create only if it doesn't exist
* Note that CREATE | EXCL implies that
* but since we want avoid ambiguity (eg when flags
* is zero) then just set this
*/
if (n->nlmsg_flags & NLM_F_REPLACE)
ovr = 1;
ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr);
break;
case RTM_DELACTION:
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
portid, RTM_DELACTION);
break;
case RTM_GETACTION:
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
portid, RTM_GETACTION);
break;
default:
BUG();
}
return ret;
}
static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
{
struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct nlattr *nla[TCAA_MAX + 1];
struct nlattr *kind;
if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, NULL) < 0)
return NULL;
tb1 = nla[TCA_ACT_TAB];
if (tb1 == NULL)
return NULL;
if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1),
NLMSG_ALIGN(nla_len(tb1)), NULL) < 0)
return NULL;
if (tb[1] == NULL)
return NULL;
if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL) < 0)
return NULL;
kind = tb2[TCA_ACT_KIND];
return kind;
}
static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
struct tc_action_ops *a_o;
int ret = 0;
struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
struct nlattr *kind = find_dump_kind(cb->nlh);
if (kind == NULL) {
pr_info("tc_dump_action: action bad kind\n");
return 0;
}
a_o = tc_lookup_action(kind);
if (a_o == NULL)
return 0;
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
cb->nlh->nlmsg_type, sizeof(*t), 0);
if (!nlh)
goto out_module_put;
t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
goto out_module_put;
ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o);
if (ret < 0)
goto out_module_put;
if (ret > 0) {
nla_nest_end(skb, nest);
ret = skb->len;
} else
nlmsg_trim(skb, b);
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
if (NETLINK_CB(cb->skb).portid && ret)
nlh->nlmsg_flags |= NLM_F_MULTI;
module_put(a_o->owner);
return skb->len;
out_module_put:
module_put(a_o->owner);
nlmsg_trim(skb, b);
return skb->len;
}
static int __init tc_action_init(void)
{
rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
NULL);
return 0;
}
subsys_initcall(tc_action_init);