Revert "BACKPORT: bpf: multi program support for cgroup+bpf"
This reverts commit 148f111e98
.
This commit is contained in:
parent
dd475f3220
commit
4fce632291
8 changed files with 142 additions and 501 deletions
|
@ -13,42 +13,27 @@ struct sk_buff;
|
|||
extern struct static_key_false cgroup_bpf_enabled_key;
|
||||
#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
|
||||
|
||||
struct bpf_prog_list {
|
||||
struct list_head node;
|
||||
struct bpf_prog *prog;
|
||||
};
|
||||
|
||||
struct bpf_prog_array;
|
||||
|
||||
struct cgroup_bpf {
|
||||
/* array of effective progs in this cgroup */
|
||||
struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE];
|
||||
|
||||
/* attached progs to this cgroup and attach flags
|
||||
* when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
|
||||
* have either zero or one element
|
||||
* when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
|
||||
/*
|
||||
* Store two sets of bpf_prog pointers, one for programs that are
|
||||
* pinned directly to this cgroup, and one for those that are effective
|
||||
* when this cgroup is accessed.
|
||||
*/
|
||||
struct list_head progs[MAX_BPF_ATTACH_TYPE];
|
||||
u32 flags[MAX_BPF_ATTACH_TYPE];
|
||||
|
||||
/* temp storage for effective prog array used by prog_attach/detach */
|
||||
struct bpf_prog_array __rcu *inactive;
|
||||
struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
|
||||
struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE];
|
||||
bool disallow_override[MAX_BPF_ATTACH_TYPE];
|
||||
};
|
||||
|
||||
void cgroup_bpf_put(struct cgroup *cgrp);
|
||||
int cgroup_bpf_inherit(struct cgroup *cgrp);
|
||||
void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
|
||||
|
||||
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 flags);
|
||||
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 flags);
|
||||
int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
|
||||
struct bpf_prog *prog, enum bpf_attach_type type,
|
||||
bool overridable);
|
||||
|
||||
/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
|
||||
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 flags);
|
||||
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 flags);
|
||||
/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
|
||||
int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, bool overridable);
|
||||
|
||||
int __cgroup_bpf_run_filter(struct sock *sk,
|
||||
struct sk_buff *skb,
|
||||
|
@ -81,7 +66,8 @@ int __cgroup_bpf_run_filter(struct sock *sk,
|
|||
|
||||
struct cgroup_bpf {};
|
||||
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
|
||||
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
|
||||
static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
|
||||
struct cgroup *parent) {}
|
||||
|
||||
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
|
||||
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
|
||||
|
|
|
@ -241,38 +241,6 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
|
|||
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
|
||||
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
|
||||
|
||||
/* an array of programs to be executed under rcu_lock.
|
||||
*
|
||||
* Typical usage:
|
||||
* ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN);
|
||||
*
|
||||
* the structure returned by bpf_prog_array_alloc() should be populated
|
||||
* with program pointers and the last pointer must be NULL.
|
||||
* The user has to keep refcnt on the program and make sure the program
|
||||
* is removed from the array before bpf_prog_put().
|
||||
* The 'struct bpf_prog_array *' should only be replaced with xchg()
|
||||
* since other cpus are walking the array of pointers in parallel.
|
||||
*/
|
||||
struct bpf_prog_array {
|
||||
struct rcu_head rcu;
|
||||
struct bpf_prog *progs[0];
|
||||
};
|
||||
|
||||
struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
|
||||
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
|
||||
|
||||
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
|
||||
({ \
|
||||
struct bpf_prog **_prog; \
|
||||
u32 _ret = 1; \
|
||||
rcu_read_lock(); \
|
||||
_prog = rcu_dereference(array)->progs; \
|
||||
for (; *_prog; _prog++) \
|
||||
_ret &= func(*_prog, ctx); \
|
||||
rcu_read_unlock(); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
DECLARE_PER_CPU(int, bpf_prog_active);
|
||||
|
||||
|
|
|
@ -423,7 +423,7 @@ struct sk_filter {
|
|||
struct bpf_prog *prog;
|
||||
};
|
||||
|
||||
#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi)
|
||||
#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi)
|
||||
|
||||
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
|
||||
|
||||
|
|
|
@ -109,47 +109,11 @@ enum bpf_attach_type {
|
|||
|
||||
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
|
||||
|
||||
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
|
||||
*
|
||||
* NONE(default): No further bpf programs allowed in the subtree.
|
||||
*
|
||||
* BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
|
||||
* the program in this cgroup yields to sub-cgroup program.
|
||||
*
|
||||
* BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
|
||||
* that cgroup program gets run in addition to the program in this cgroup.
|
||||
*
|
||||
* Only one program is allowed to be attached to a cgroup with
|
||||
* NONE or BPF_F_ALLOW_OVERRIDE flag.
|
||||
* Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
|
||||
* release old program and attach the new one. Attach flags has to match.
|
||||
*
|
||||
* Multiple programs are allowed to be attached to a cgroup with
|
||||
* BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
|
||||
* (those that were attached first, run first)
|
||||
* The programs of sub-cgroup are executed first, then programs of
|
||||
* this cgroup and then programs of parent cgroup.
|
||||
* When children program makes decision (like picking TCP CA or sock bind)
|
||||
* parent program has a chance to override it.
|
||||
*
|
||||
* A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
|
||||
* A cgroup with NONE doesn't allow any programs in sub-cgroups.
|
||||
* Ex1:
|
||||
* cgrp1 (MULTI progs A, B) ->
|
||||
* cgrp2 (OVERRIDE prog C) ->
|
||||
* cgrp3 (MULTI prog D) ->
|
||||
* cgrp4 (OVERRIDE prog E) ->
|
||||
* cgrp5 (NONE prog F)
|
||||
* the event in cgrp5 triggers execution of F,D,A,B in that order.
|
||||
* if prog F is detached, the execution is E,D,A,B
|
||||
* if prog F and D are detached, the execution is E,A,B
|
||||
* if prog F, E and D are detached, the execution is C,A,B
|
||||
*
|
||||
* All eligible programs are executed regardless of return code from
|
||||
* earlier programs.
|
||||
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
|
||||
* to the given target_fd cgroup the descendent cgroup will be able to
|
||||
* override effective bpf program that was inherited from this cgroup
|
||||
*/
|
||||
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
|
||||
#define BPF_F_ALLOW_MULTI (1U << 1)
|
||||
|
||||
#define BPF_PSEUDO_MAP_FD 1
|
||||
|
||||
|
|
|
@ -27,361 +27,129 @@ void cgroup_bpf_put(struct cgroup *cgrp)
|
|||
{
|
||||
unsigned int type;
|
||||
|
||||
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
|
||||
struct list_head *progs = &cgrp->bpf.progs[type];
|
||||
struct bpf_prog_list *pl, *tmp;
|
||||
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
|
||||
struct bpf_prog *prog = cgrp->bpf.prog[type];
|
||||
|
||||
list_for_each_entry_safe(pl, tmp, progs, node) {
|
||||
list_del(&pl->node);
|
||||
bpf_prog_put(pl->prog);
|
||||
kfree(pl);
|
||||
if (prog) {
|
||||
bpf_prog_put(prog);
|
||||
static_branch_dec(&cgroup_bpf_enabled_key);
|
||||
}
|
||||
bpf_prog_array_free(cgrp->bpf.effective[type]);
|
||||
}
|
||||
}
|
||||
|
||||
/* count number of elements in the list.
|
||||
* it's slow but the list cannot be long
|
||||
*/
|
||||
static u32 prog_list_length(struct list_head *head)
|
||||
{
|
||||
struct bpf_prog_list *pl;
|
||||
u32 cnt = 0;
|
||||
|
||||
list_for_each_entry(pl, head, node) {
|
||||
if (!pl->prog)
|
||||
continue;
|
||||
cnt++;
|
||||
}
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/* if parent has non-overridable prog attached,
|
||||
* disallow attaching new programs to the descendent cgroup.
|
||||
* if parent has overridable or multi-prog, allow attaching
|
||||
*/
|
||||
static bool hierarchy_allows_attach(struct cgroup *cgrp,
|
||||
enum bpf_attach_type type,
|
||||
u32 new_flags)
|
||||
{
|
||||
struct cgroup *p;
|
||||
|
||||
p = cgroup_parent(cgrp);
|
||||
if (!p)
|
||||
return true;
|
||||
do {
|
||||
u32 flags = p->bpf.flags[type];
|
||||
u32 cnt;
|
||||
|
||||
if (flags & BPF_F_ALLOW_MULTI)
|
||||
return true;
|
||||
cnt = prog_list_length(&p->bpf.progs[type]);
|
||||
WARN_ON_ONCE(cnt > 1);
|
||||
if (cnt == 1)
|
||||
return !!(flags & BPF_F_ALLOW_OVERRIDE);
|
||||
p = cgroup_parent(p);
|
||||
} while (p);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* compute a chain of effective programs for a given cgroup:
|
||||
* start from the list of programs in this cgroup and add
|
||||
* all parent programs.
|
||||
* Note that parent's F_ALLOW_OVERRIDE-type program is yielding
|
||||
* to programs in this cgroup
|
||||
*/
|
||||
static int compute_effective_progs(struct cgroup *cgrp,
|
||||
enum bpf_attach_type type,
|
||||
struct bpf_prog_array __rcu **array)
|
||||
{
|
||||
struct bpf_prog_array __rcu *progs;
|
||||
struct bpf_prog_list *pl;
|
||||
struct cgroup *p = cgrp;
|
||||
int cnt = 0;
|
||||
|
||||
/* count number of effective programs by walking parents */
|
||||
do {
|
||||
if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
|
||||
cnt += prog_list_length(&p->bpf.progs[type]);
|
||||
p = cgroup_parent(p);
|
||||
} while (p);
|
||||
|
||||
progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
|
||||
if (!progs)
|
||||
return -ENOMEM;
|
||||
|
||||
/* populate the array with effective progs */
|
||||
cnt = 0;
|
||||
p = cgrp;
|
||||
do {
|
||||
if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
|
||||
list_for_each_entry(pl,
|
||||
&p->bpf.progs[type], node) {
|
||||
if (!pl->prog)
|
||||
continue;
|
||||
rcu_dereference_protected(progs, 1)->
|
||||
progs[cnt++] = pl->prog;
|
||||
}
|
||||
p = cgroup_parent(p);
|
||||
} while (p);
|
||||
|
||||
*array = progs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void activate_effective_progs(struct cgroup *cgrp,
|
||||
enum bpf_attach_type type,
|
||||
struct bpf_prog_array __rcu *array)
|
||||
{
|
||||
struct bpf_prog_array __rcu *old_array;
|
||||
|
||||
old_array = xchg(&cgrp->bpf.effective[type], array);
|
||||
/* free prog array after grace period, since __cgroup_bpf_run_*()
|
||||
* might be still walking the array
|
||||
*/
|
||||
bpf_prog_array_free(old_array);
|
||||
}
|
||||
|
||||
/**
|
||||
* cgroup_bpf_inherit() - inherit effective programs from parent
|
||||
* @cgrp: the cgroup to modify
|
||||
* @parent: the parent to inherit from
|
||||
*/
|
||||
int cgroup_bpf_inherit(struct cgroup *cgrp)
|
||||
void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
|
||||
{
|
||||
/* has to use marco instead of const int, since compiler thinks
|
||||
* that array below is variable length
|
||||
*/
|
||||
#define NR ARRAY_SIZE(cgrp->bpf.effective)
|
||||
struct bpf_prog_array __rcu *arrays[NR] = {};
|
||||
int i;
|
||||
unsigned int type;
|
||||
|
||||
for (i = 0; i < NR; i++)
|
||||
INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
|
||||
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
|
||||
struct bpf_prog *e;
|
||||
|
||||
for (i = 0; i < NR; i++)
|
||||
if (compute_effective_progs(cgrp, i, &arrays[i]))
|
||||
goto cleanup;
|
||||
|
||||
for (i = 0; i < NR; i++)
|
||||
activate_effective_progs(cgrp, i, arrays[i]);
|
||||
|
||||
return 0;
|
||||
cleanup:
|
||||
for (i = 0; i < NR; i++)
|
||||
bpf_prog_array_free(arrays[i]);
|
||||
return -ENOMEM;
|
||||
e = rcu_dereference_protected(parent->bpf.effective[type],
|
||||
lockdep_is_held(&cgroup_mutex));
|
||||
rcu_assign_pointer(cgrp->bpf.effective[type], e);
|
||||
cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
|
||||
}
|
||||
}
|
||||
|
||||
#define BPF_CGROUP_MAX_PROGS 64
|
||||
|
||||
/**
|
||||
* __cgroup_bpf_attach() - Attach the program to a cgroup, and
|
||||
* __cgroup_bpf_update() - Update the pinned program of a cgroup, and
|
||||
* propagate the change to descendants
|
||||
* @cgrp: The cgroup which descendants to traverse
|
||||
* @prog: A program to attach
|
||||
* @type: Type of attach operation
|
||||
* @parent: The parent of @cgrp, or %NULL if @cgrp is the root
|
||||
* @prog: A new program to pin
|
||||
* @type: Type of pinning operation (ingress/egress)
|
||||
*
|
||||
* Each cgroup has a set of two pointers for bpf programs; one for eBPF
|
||||
* programs it owns, and which is effective for execution.
|
||||
*
|
||||
* If @prog is not %NULL, this function attaches a new program to the cgroup
|
||||
* and releases the one that is currently attached, if any. @prog is then made
|
||||
* the effective program of type @type in that cgroup.
|
||||
*
|
||||
* If @prog is %NULL, the currently attached program of type @type is released,
|
||||
* and the effective program of the parent cgroup (if any) is inherited to
|
||||
* @cgrp.
|
||||
*
|
||||
* Then, the descendants of @cgrp are walked and the effective program for
|
||||
* each of them is set to the effective program of @cgrp unless the
|
||||
* descendant has its own program attached, in which case the subbranch is
|
||||
* skipped. This ensures that delegated subcgroups with own programs are left
|
||||
* untouched.
|
||||
*
|
||||
* Must be called with cgroup_mutex held.
|
||||
*/
|
||||
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 flags)
|
||||
int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
|
||||
struct bpf_prog *prog, enum bpf_attach_type type,
|
||||
bool new_overridable)
|
||||
{
|
||||
struct list_head *progs = &cgrp->bpf.progs[type];
|
||||
struct bpf_prog *old_prog = NULL;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct bpf_prog_list *pl;
|
||||
bool pl_was_allocated;
|
||||
u32 old_flags;
|
||||
int err;
|
||||
struct bpf_prog *old_prog, *effective = NULL;
|
||||
struct cgroup_subsys_state *pos;
|
||||
bool overridable = true;
|
||||
|
||||
if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
|
||||
/* invalid combination */
|
||||
return -EINVAL;
|
||||
if (parent) {
|
||||
overridable = !parent->bpf.disallow_override[type];
|
||||
effective = rcu_dereference_protected(parent->bpf.effective[type],
|
||||
lockdep_is_held(&cgroup_mutex));
|
||||
}
|
||||
|
||||
if (!hierarchy_allows_attach(cgrp, type, flags))
|
||||
return -EPERM;
|
||||
|
||||
if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
|
||||
/* Disallow attaching non-overridable on top
|
||||
* of existing overridable in this cgroup.
|
||||
* Disallow attaching multi-prog if overridable or none
|
||||
if (prog && effective && !overridable)
|
||||
/* if parent has non-overridable prog attached, disallow
|
||||
* attaching new programs to descendent cgroup
|
||||
*/
|
||||
return -EPERM;
|
||||
|
||||
if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
|
||||
return -E2BIG;
|
||||
if (prog && effective && overridable != new_overridable)
|
||||
/* if parent has overridable prog attached, only
|
||||
* allow overridable programs in descendent cgroup
|
||||
*/
|
||||
return -EPERM;
|
||||
|
||||
if (flags & BPF_F_ALLOW_MULTI) {
|
||||
list_for_each_entry(pl, progs, node)
|
||||
if (pl->prog == prog)
|
||||
/* disallow attaching the same prog twice */
|
||||
return -EINVAL;
|
||||
old_prog = cgrp->bpf.prog[type];
|
||||
|
||||
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
|
||||
if (!pl)
|
||||
return -ENOMEM;
|
||||
pl_was_allocated = true;
|
||||
pl->prog = prog;
|
||||
list_add_tail(&pl->node, progs);
|
||||
} else {
|
||||
if (list_empty(progs)) {
|
||||
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
|
||||
if (!pl)
|
||||
return -ENOMEM;
|
||||
pl_was_allocated = true;
|
||||
list_add_tail(&pl->node, progs);
|
||||
if (prog) {
|
||||
overridable = new_overridable;
|
||||
effective = prog;
|
||||
if (old_prog &&
|
||||
cgrp->bpf.disallow_override[type] == new_overridable)
|
||||
/* disallow attaching non-overridable on top
|
||||
* of existing overridable in this cgroup
|
||||
* and vice versa
|
||||
*/
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
if (!prog && !old_prog)
|
||||
/* report error when trying to detach and nothing is attached */
|
||||
return -ENOENT;
|
||||
|
||||
cgrp->bpf.prog[type] = prog;
|
||||
|
||||
css_for_each_descendant_pre(pos, &cgrp->self) {
|
||||
struct cgroup *desc = container_of(pos, struct cgroup, self);
|
||||
|
||||
/* skip the subtree if the descendant has its own program */
|
||||
if (desc->bpf.prog[type] && desc != cgrp) {
|
||||
pos = css_rightmost_descendant(pos);
|
||||
} else {
|
||||
pl = list_first_entry(progs, typeof(*pl), node);
|
||||
old_prog = pl->prog;
|
||||
pl_was_allocated = false;
|
||||
rcu_assign_pointer(desc->bpf.effective[type],
|
||||
effective);
|
||||
desc->bpf.disallow_override[type] = !overridable;
|
||||
}
|
||||
pl->prog = prog;
|
||||
}
|
||||
|
||||
old_flags = cgrp->bpf.flags[type];
|
||||
cgrp->bpf.flags[type] = flags;
|
||||
if (prog)
|
||||
static_branch_inc(&cgroup_bpf_enabled_key);
|
||||
|
||||
/* allocate and recompute effective prog arrays */
|
||||
css_for_each_descendant_pre(css, &cgrp->self) {
|
||||
struct cgroup *desc = container_of(css, struct cgroup, self);
|
||||
|
||||
err = compute_effective_progs(desc, type, &desc->bpf.inactive);
|
||||
if (err)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* all allocations were successful. Activate all prog arrays */
|
||||
css_for_each_descendant_pre(css, &cgrp->self) {
|
||||
struct cgroup *desc = container_of(css, struct cgroup, self);
|
||||
|
||||
activate_effective_progs(desc, type, desc->bpf.inactive);
|
||||
desc->bpf.inactive = NULL;
|
||||
}
|
||||
|
||||
static_branch_inc(&cgroup_bpf_enabled_key);
|
||||
if (old_prog) {
|
||||
bpf_prog_put(old_prog);
|
||||
static_branch_dec(&cgroup_bpf_enabled_key);
|
||||
}
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
/* oom while computing effective. Free all computed effective arrays
|
||||
* since they were not activated
|
||||
*/
|
||||
css_for_each_descendant_pre(css, &cgrp->self) {
|
||||
struct cgroup *desc = container_of(css, struct cgroup, self);
|
||||
|
||||
bpf_prog_array_free(desc->bpf.inactive);
|
||||
desc->bpf.inactive = NULL;
|
||||
}
|
||||
|
||||
/* and cleanup the prog list */
|
||||
pl->prog = old_prog;
|
||||
if (pl_was_allocated) {
|
||||
list_del(&pl->node);
|
||||
kfree(pl);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* __cgroup_bpf_detach() - Detach the program from a cgroup, and
|
||||
* propagate the change to descendants
|
||||
* @cgrp: The cgroup which descendants to traverse
|
||||
* @prog: A program to detach or NULL
|
||||
* @type: Type of detach operation
|
||||
*
|
||||
* Must be called with cgroup_mutex held.
|
||||
*/
|
||||
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 unused_flags)
|
||||
{
|
||||
struct list_head *progs = &cgrp->bpf.progs[type];
|
||||
u32 flags = cgrp->bpf.flags[type];
|
||||
struct bpf_prog *old_prog = NULL;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct bpf_prog_list *pl;
|
||||
int err;
|
||||
|
||||
if (flags & BPF_F_ALLOW_MULTI) {
|
||||
if (!prog)
|
||||
/* to detach MULTI prog the user has to specify valid FD
|
||||
* of the program to be detached
|
||||
*/
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (list_empty(progs))
|
||||
/* report error when trying to detach and nothing is attached */
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
if (flags & BPF_F_ALLOW_MULTI) {
|
||||
/* find the prog and detach it */
|
||||
list_for_each_entry(pl, progs, node) {
|
||||
if (pl->prog != prog)
|
||||
continue;
|
||||
old_prog = prog;
|
||||
/* mark it deleted, so it's ignored while
|
||||
* recomputing effective
|
||||
*/
|
||||
pl->prog = NULL;
|
||||
break;
|
||||
}
|
||||
if (!old_prog)
|
||||
return -ENOENT;
|
||||
} else {
|
||||
/* to maintain backward compatibility NONE and OVERRIDE cgroups
|
||||
* allow detaching with invalid FD (prog==NULL)
|
||||
*/
|
||||
pl = list_first_entry(progs, typeof(*pl), node);
|
||||
old_prog = pl->prog;
|
||||
pl->prog = NULL;
|
||||
}
|
||||
|
||||
/* allocate and recompute effective prog arrays */
|
||||
css_for_each_descendant_pre(css, &cgrp->self) {
|
||||
struct cgroup *desc = container_of(css, struct cgroup, self);
|
||||
|
||||
err = compute_effective_progs(desc, type, &desc->bpf.inactive);
|
||||
if (err)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* all allocations were successful. Activate all prog arrays */
|
||||
css_for_each_descendant_pre(css, &cgrp->self) {
|
||||
struct cgroup *desc = container_of(css, struct cgroup, self);
|
||||
|
||||
activate_effective_progs(desc, type, desc->bpf.inactive);
|
||||
desc->bpf.inactive = NULL;
|
||||
}
|
||||
|
||||
/* now can actually delete it from this cgroup list */
|
||||
list_del(&pl->node);
|
||||
kfree(pl);
|
||||
if (list_empty(progs))
|
||||
/* last program was detached, reset flags to zero */
|
||||
cgrp->bpf.flags[type] = 0;
|
||||
|
||||
bpf_prog_put(old_prog);
|
||||
static_branch_dec(&cgroup_bpf_enabled_key);
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
/* oom while computing effective. Free all computed effective arrays
|
||||
* since they were not activated
|
||||
*/
|
||||
css_for_each_descendant_pre(css, &cgrp->self) {
|
||||
struct cgroup *desc = container_of(css, struct cgroup, self);
|
||||
|
||||
bpf_prog_array_free(desc->bpf.inactive);
|
||||
desc->bpf.inactive = NULL;
|
||||
}
|
||||
|
||||
/* and restore back old_prog */
|
||||
pl->prog = old_prog;
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -403,25 +171,35 @@ int __cgroup_bpf_run_filter(struct sock *sk,
|
|||
struct sk_buff *skb,
|
||||
enum bpf_attach_type type)
|
||||
{
|
||||
unsigned int offset = skb->data - skb_network_header(skb);
|
||||
struct sock *save_sk;
|
||||
struct bpf_prog *prog;
|
||||
struct cgroup *cgrp;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
if (!sk || !sk_fullsock(sk))
|
||||
return 0;
|
||||
|
||||
if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
|
||||
if (sk->sk_family != AF_INET &&
|
||||
sk->sk_family != AF_INET6)
|
||||
return 0;
|
||||
|
||||
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
save_sk = skb->sk;
|
||||
skb->sk = sk;
|
||||
__skb_push(skb, offset);
|
||||
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
|
||||
bpf_prog_run_save_cb);
|
||||
__skb_pull(skb, offset);
|
||||
skb->sk = save_sk;
|
||||
return ret == 1 ? 0 : -EPERM;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
prog = rcu_dereference(cgrp->bpf.effective[type]);
|
||||
if (prog) {
|
||||
unsigned int offset = skb->data - skb_network_header(skb);
|
||||
struct sock *save_sk = skb->sk;
|
||||
|
||||
skb->sk = sk;
|
||||
__skb_push(skb, offset);
|
||||
ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
|
||||
__skb_pull(skb, offset);
|
||||
skb->sk = save_sk;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter);
|
||||
|
|
|
@ -1080,37 +1080,6 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
|
||||
|
||||
/* to avoid allocating empty bpf_prog_array for cgroups that
|
||||
* don't have bpf program attached use one global 'empty_prog_array'
|
||||
* It will not be modified the caller of bpf_prog_array_alloc()
|
||||
* (since caller requested prog_cnt == 0)
|
||||
* that pointer should be 'freed' by bpf_prog_array_free()
|
||||
*/
|
||||
static struct {
|
||||
struct bpf_prog_array hdr;
|
||||
struct bpf_prog *null_prog;
|
||||
} empty_prog_array = {
|
||||
.null_prog = NULL,
|
||||
};
|
||||
|
||||
struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
|
||||
{
|
||||
if (prog_cnt)
|
||||
return kzalloc(sizeof(struct bpf_prog_array) +
|
||||
sizeof(struct bpf_prog *) * (prog_cnt + 1),
|
||||
flags);
|
||||
|
||||
return &empty_prog_array.hdr;
|
||||
}
|
||||
|
||||
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
|
||||
{
|
||||
if (!progs ||
|
||||
progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
|
||||
return;
|
||||
kfree_rcu(progs, rcu);
|
||||
}
|
||||
|
||||
static void bpf_prog_free_deferred(struct work_struct *work)
|
||||
{
|
||||
struct bpf_prog_aux *aux;
|
||||
|
|
|
@ -894,9 +894,6 @@ static int bpf_obj_get(const union bpf_attr *attr)
|
|||
|
||||
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
|
||||
|
||||
#define BPF_F_ATTACH_MASK \
|
||||
(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
|
||||
|
||||
static int bpf_prog_attach(const union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
|
@ -909,7 +906,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
|||
if (CHECK_ATTR(BPF_PROG_ATTACH))
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
|
||||
if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
|
||||
return -EINVAL;
|
||||
|
||||
switch (attr->attach_type) {
|
||||
|
@ -926,8 +923,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
|||
return PTR_ERR(cgrp);
|
||||
}
|
||||
|
||||
ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
|
||||
attr->attach_flags);
|
||||
ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
|
||||
attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
|
||||
if (ret)
|
||||
bpf_prog_put(prog);
|
||||
cgroup_put(cgrp);
|
||||
|
@ -944,8 +941,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
|||
|
||||
static int bpf_prog_detach(const union bpf_attr *attr)
|
||||
{
|
||||
enum bpf_prog_type ptype;
|
||||
struct bpf_prog *prog;
|
||||
struct cgroup *cgrp;
|
||||
int ret;
|
||||
|
||||
|
@ -958,25 +953,18 @@ static int bpf_prog_detach(const union bpf_attr *attr)
|
|||
switch (attr->attach_type) {
|
||||
case BPF_CGROUP_INET_INGRESS:
|
||||
case BPF_CGROUP_INET_EGRESS:
|
||||
ptype = BPF_PROG_TYPE_CGROUP_SKB;
|
||||
cgrp = cgroup_get_from_fd(attr->target_fd);
|
||||
if (IS_ERR(cgrp))
|
||||
return PTR_ERR(cgrp);
|
||||
|
||||
ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
|
||||
cgroup_put(cgrp);
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cgrp = cgroup_get_from_fd(attr->target_fd);
|
||||
if (IS_ERR(cgrp))
|
||||
return PTR_ERR(cgrp);
|
||||
|
||||
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
|
||||
if (IS_ERR(prog))
|
||||
prog = NULL;
|
||||
|
||||
ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
|
||||
if (prog)
|
||||
bpf_prog_put(prog);
|
||||
cgroup_put(cgrp);
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_CGROUP_BPF */
|
||||
|
|
|
@ -2020,9 +2020,6 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
|
|||
if (ret)
|
||||
goto destroy_root;
|
||||
|
||||
ret = cgroup_bpf_inherit(root_cgrp);
|
||||
WARN_ON_ONCE(ret);
|
||||
|
||||
trace_cgroup_setup_root(root);
|
||||
|
||||
/*
|
||||
|
@ -5386,9 +5383,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
|
|||
cgrp->self.parent = &parent->self;
|
||||
cgrp->root = root;
|
||||
cgrp->level = level;
|
||||
ret = cgroup_bpf_inherit(cgrp);
|
||||
if (ret)
|
||||
goto out_idr_free;
|
||||
|
||||
for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp))
|
||||
cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
|
||||
|
@ -5425,6 +5419,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
|
|||
goto out_idr_free;
|
||||
}
|
||||
|
||||
if (parent)
|
||||
cgroup_bpf_inherit(cgrp, parent);
|
||||
|
||||
cgroup_propagate_control(cgrp);
|
||||
|
||||
return cgrp;
|
||||
|
@ -6647,23 +6644,14 @@ static __init int cgroup_namespaces_init(void)
|
|||
subsys_initcall(cgroup_namespaces_init);
|
||||
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 flags)
|
||||
int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, bool overridable)
|
||||
{
|
||||
struct cgroup *parent = cgroup_parent(cgrp);
|
||||
int ret;
|
||||
|
||||
mutex_lock(&cgroup_mutex);
|
||||
ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
return ret;
|
||||
}
|
||||
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&cgroup_mutex);
|
||||
ret = __cgroup_bpf_detach(cgrp, prog, type, flags);
|
||||
ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue