Revert "BACKPORT: bpf: multi program support for cgroup+bpf"

This reverts commit 148f111e98.
This commit is contained in:
Mustafa Gökmen 2024-07-24 16:19:53 +03:00
parent dd475f3220
commit 4fce632291
No known key found for this signature in database
GPG key ID: 3204D8100CFF21ED
8 changed files with 142 additions and 501 deletions

View file

@ -13,42 +13,27 @@ struct sk_buff;
extern struct static_key_false cgroup_bpf_enabled_key; extern struct static_key_false cgroup_bpf_enabled_key;
#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
struct bpf_prog_list {
struct list_head node;
struct bpf_prog *prog;
};
struct bpf_prog_array;
struct cgroup_bpf { struct cgroup_bpf {
/* array of effective progs in this cgroup */ /*
struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE]; * Store two sets of bpf_prog pointers, one for programs that are
* pinned directly to this cgroup, and one for those that are effective
/* attached progs to this cgroup and attach flags * when this cgroup is accessed.
* when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
* have either zero or one element
* when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
*/ */
struct list_head progs[MAX_BPF_ATTACH_TYPE]; struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
u32 flags[MAX_BPF_ATTACH_TYPE]; struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE];
bool disallow_override[MAX_BPF_ATTACH_TYPE];
/* temp storage for effective prog array used by prog_attach/detach */
struct bpf_prog_array __rcu *inactive;
}; };
void cgroup_bpf_put(struct cgroup *cgrp); void cgroup_bpf_put(struct cgroup *cgrp);
int cgroup_bpf_inherit(struct cgroup *cgrp); void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
enum bpf_attach_type type, u32 flags); struct bpf_prog *prog, enum bpf_attach_type type,
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, bool overridable);
enum bpf_attach_type type, u32 flags);
/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ /* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags); enum bpf_attach_type type, bool overridable);
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags);
int __cgroup_bpf_run_filter(struct sock *sk, int __cgroup_bpf_run_filter(struct sock *sk,
struct sk_buff *skb, struct sk_buff *skb,
@ -81,7 +66,8 @@ int __cgroup_bpf_run_filter(struct sock *sk,
struct cgroup_bpf {}; struct cgroup_bpf {};
static inline void cgroup_bpf_put(struct cgroup *cgrp) {} static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
struct cgroup *parent) {}
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })

View file

@ -241,38 +241,6 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
/* an array of programs to be executed under rcu_lock.
*
* Typical usage:
* ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN);
*
* the structure returned by bpf_prog_array_alloc() should be populated
* with program pointers and the last pointer must be NULL.
* The user has to keep refcnt on the program and make sure the program
* is removed from the array before bpf_prog_put().
* The 'struct bpf_prog_array *' should only be replaced with xchg()
* since other cpus are walking the array of pointers in parallel.
*/
struct bpf_prog_array {
struct rcu_head rcu;
struct bpf_prog *progs[0];
};
struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
({ \
struct bpf_prog **_prog; \
u32 _ret = 1; \
rcu_read_lock(); \
_prog = rcu_dereference(array)->progs; \
for (; *_prog; _prog++) \
_ret &= func(*_prog, ctx); \
rcu_read_unlock(); \
_ret; \
})
#ifdef CONFIG_BPF_SYSCALL #ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active); DECLARE_PER_CPU(int, bpf_prog_active);

View file

@ -423,7 +423,7 @@ struct sk_filter {
struct bpf_prog *prog; struct bpf_prog *prog;
}; };
#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi) #define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi)
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN

View file

@ -109,47 +109,11 @@ enum bpf_attach_type {
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* * to the given target_fd cgroup the descendent cgroup will be able to
* NONE(default): No further bpf programs allowed in the subtree. * override effective bpf program that was inherited from this cgroup
*
* BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
* the program in this cgroup yields to sub-cgroup program.
*
* BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
* that cgroup program gets run in addition to the program in this cgroup.
*
* Only one program is allowed to be attached to a cgroup with
* NONE or BPF_F_ALLOW_OVERRIDE flag.
* Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
* release old program and attach the new one. Attach flags has to match.
*
* Multiple programs are allowed to be attached to a cgroup with
* BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
* (those that were attached first, run first)
* The programs of sub-cgroup are executed first, then programs of
* this cgroup and then programs of parent cgroup.
* When children program makes decision (like picking TCP CA or sock bind)
* parent program has a chance to override it.
*
* A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
* A cgroup with NONE doesn't allow any programs in sub-cgroups.
* Ex1:
* cgrp1 (MULTI progs A, B) ->
* cgrp2 (OVERRIDE prog C) ->
* cgrp3 (MULTI prog D) ->
* cgrp4 (OVERRIDE prog E) ->
* cgrp5 (NONE prog F)
* the event in cgrp5 triggers execution of F,D,A,B in that order.
* if prog F is detached, the execution is E,D,A,B
* if prog F and D are detached, the execution is E,A,B
* if prog F, E and D are detached, the execution is C,A,B
*
* All eligible programs are executed regardless of return code from
* earlier programs.
*/ */
#define BPF_F_ALLOW_OVERRIDE (1U << 0) #define BPF_F_ALLOW_OVERRIDE (1U << 0)
#define BPF_F_ALLOW_MULTI (1U << 1)
#define BPF_PSEUDO_MAP_FD 1 #define BPF_PSEUDO_MAP_FD 1

View file

@ -27,361 +27,129 @@ void cgroup_bpf_put(struct cgroup *cgrp)
{ {
unsigned int type; unsigned int type;
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
struct list_head *progs = &cgrp->bpf.progs[type]; struct bpf_prog *prog = cgrp->bpf.prog[type];
struct bpf_prog_list *pl, *tmp;
list_for_each_entry_safe(pl, tmp, progs, node) { if (prog) {
list_del(&pl->node); bpf_prog_put(prog);
bpf_prog_put(pl->prog);
kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key); static_branch_dec(&cgroup_bpf_enabled_key);
} }
bpf_prog_array_free(cgrp->bpf.effective[type]);
} }
} }
/* count number of elements in the list.
* it's slow but the list cannot be long
*/
static u32 prog_list_length(struct list_head *head)
{
struct bpf_prog_list *pl;
u32 cnt = 0;
list_for_each_entry(pl, head, node) {
if (!pl->prog)
continue;
cnt++;
}
return cnt;
}
/* if parent has non-overridable prog attached,
* disallow attaching new programs to the descendent cgroup.
* if parent has overridable or multi-prog, allow attaching
*/
static bool hierarchy_allows_attach(struct cgroup *cgrp,
enum bpf_attach_type type,
u32 new_flags)
{
struct cgroup *p;
p = cgroup_parent(cgrp);
if (!p)
return true;
do {
u32 flags = p->bpf.flags[type];
u32 cnt;
if (flags & BPF_F_ALLOW_MULTI)
return true;
cnt = prog_list_length(&p->bpf.progs[type]);
WARN_ON_ONCE(cnt > 1);
if (cnt == 1)
return !!(flags & BPF_F_ALLOW_OVERRIDE);
p = cgroup_parent(p);
} while (p);
return true;
}
/* compute a chain of effective programs for a given cgroup:
* start from the list of programs in this cgroup and add
* all parent programs.
* Note that parent's F_ALLOW_OVERRIDE-type program is yielding
* to programs in this cgroup
*/
static int compute_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
struct bpf_prog_array __rcu **array)
{
struct bpf_prog_array __rcu *progs;
struct bpf_prog_list *pl;
struct cgroup *p = cgrp;
int cnt = 0;
/* count number of effective programs by walking parents */
do {
if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
cnt += prog_list_length(&p->bpf.progs[type]);
p = cgroup_parent(p);
} while (p);
progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
if (!progs)
return -ENOMEM;
/* populate the array with effective progs */
cnt = 0;
p = cgrp;
do {
if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
list_for_each_entry(pl,
&p->bpf.progs[type], node) {
if (!pl->prog)
continue;
rcu_dereference_protected(progs, 1)->
progs[cnt++] = pl->prog;
}
p = cgroup_parent(p);
} while (p);
*array = progs;
return 0;
}
static void activate_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
struct bpf_prog_array __rcu *array)
{
struct bpf_prog_array __rcu *old_array;
old_array = xchg(&cgrp->bpf.effective[type], array);
/* free prog array after grace period, since __cgroup_bpf_run_*()
* might be still walking the array
*/
bpf_prog_array_free(old_array);
}
/** /**
* cgroup_bpf_inherit() - inherit effective programs from parent * cgroup_bpf_inherit() - inherit effective programs from parent
* @cgrp: the cgroup to modify * @cgrp: the cgroup to modify
* @parent: the parent to inherit from
*/ */
int cgroup_bpf_inherit(struct cgroup *cgrp) void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
{ {
/* has to use marco instead of const int, since compiler thinks unsigned int type;
* that array below is variable length
*/
#define NR ARRAY_SIZE(cgrp->bpf.effective)
struct bpf_prog_array __rcu *arrays[NR] = {};
int i;
for (i = 0; i < NR; i++) for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
INIT_LIST_HEAD(&cgrp->bpf.progs[i]); struct bpf_prog *e;
for (i = 0; i < NR; i++) e = rcu_dereference_protected(parent->bpf.effective[type],
if (compute_effective_progs(cgrp, i, &arrays[i])) lockdep_is_held(&cgroup_mutex));
goto cleanup; rcu_assign_pointer(cgrp->bpf.effective[type], e);
cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
for (i = 0; i < NR; i++) }
activate_effective_progs(cgrp, i, arrays[i]);
return 0;
cleanup:
for (i = 0; i < NR; i++)
bpf_prog_array_free(arrays[i]);
return -ENOMEM;
} }
#define BPF_CGROUP_MAX_PROGS 64
/** /**
* __cgroup_bpf_attach() - Attach the program to a cgroup, and * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
* propagate the change to descendants * propagate the change to descendants
* @cgrp: The cgroup which descendants to traverse * @cgrp: The cgroup which descendants to traverse
* @prog: A program to attach * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
* @type: Type of attach operation * @prog: A new program to pin
* @type: Type of pinning operation (ingress/egress)
*
* Each cgroup has a set of two pointers for bpf programs; one for eBPF
* programs it owns, and which is effective for execution.
*
* If @prog is not %NULL, this function attaches a new program to the cgroup
* and releases the one that is currently attached, if any. @prog is then made
* the effective program of type @type in that cgroup.
*
* If @prog is %NULL, the currently attached program of type @type is released,
* and the effective program of the parent cgroup (if any) is inherited to
* @cgrp.
*
* Then, the descendants of @cgrp are walked and the effective program for
* each of them is set to the effective program of @cgrp unless the
* descendant has its own program attached, in which case the subbranch is
* skipped. This ensures that delegated subcgroups with own programs are left
* untouched.
* *
* Must be called with cgroup_mutex held. * Must be called with cgroup_mutex held.
*/ */
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
enum bpf_attach_type type, u32 flags) struct bpf_prog *prog, enum bpf_attach_type type,
bool new_overridable)
{ {
struct list_head *progs = &cgrp->bpf.progs[type]; struct bpf_prog *old_prog, *effective = NULL;
struct bpf_prog *old_prog = NULL; struct cgroup_subsys_state *pos;
struct cgroup_subsys_state *css; bool overridable = true;
struct bpf_prog_list *pl;
bool pl_was_allocated;
u32 old_flags;
int err;
if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) if (parent) {
/* invalid combination */ overridable = !parent->bpf.disallow_override[type];
return -EINVAL; effective = rcu_dereference_protected(parent->bpf.effective[type],
lockdep_is_held(&cgroup_mutex));
}
if (!hierarchy_allows_attach(cgrp, type, flags)) if (prog && effective && !overridable)
return -EPERM; /* if parent has non-overridable prog attached, disallow
* attaching new programs to descendent cgroup
if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
/* Disallow attaching non-overridable on top
* of existing overridable in this cgroup.
* Disallow attaching multi-prog if overridable or none
*/ */
return -EPERM; return -EPERM;
if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) if (prog && effective && overridable != new_overridable)
return -E2BIG; /* if parent has overridable prog attached, only
* allow overridable programs in descendent cgroup
*/
return -EPERM;
if (flags & BPF_F_ALLOW_MULTI) { old_prog = cgrp->bpf.prog[type];
list_for_each_entry(pl, progs, node)
if (pl->prog == prog)
/* disallow attaching the same prog twice */
return -EINVAL;
pl = kmalloc(sizeof(*pl), GFP_KERNEL); if (prog) {
if (!pl) overridable = new_overridable;
return -ENOMEM; effective = prog;
pl_was_allocated = true; if (old_prog &&
pl->prog = prog; cgrp->bpf.disallow_override[type] == new_overridable)
list_add_tail(&pl->node, progs); /* disallow attaching non-overridable on top
} else { * of existing overridable in this cgroup
if (list_empty(progs)) { * and vice versa
pl = kmalloc(sizeof(*pl), GFP_KERNEL); */
if (!pl) return -EPERM;
return -ENOMEM; }
pl_was_allocated = true;
list_add_tail(&pl->node, progs); if (!prog && !old_prog)
/* report error when trying to detach and nothing is attached */
return -ENOENT;
cgrp->bpf.prog[type] = prog;
css_for_each_descendant_pre(pos, &cgrp->self) {
struct cgroup *desc = container_of(pos, struct cgroup, self);
/* skip the subtree if the descendant has its own program */
if (desc->bpf.prog[type] && desc != cgrp) {
pos = css_rightmost_descendant(pos);
} else { } else {
pl = list_first_entry(progs, typeof(*pl), node); rcu_assign_pointer(desc->bpf.effective[type],
old_prog = pl->prog; effective);
pl_was_allocated = false; desc->bpf.disallow_override[type] = !overridable;
} }
pl->prog = prog;
} }
old_flags = cgrp->bpf.flags[type]; if (prog)
cgrp->bpf.flags[type] = flags; static_branch_inc(&cgroup_bpf_enabled_key);
/* allocate and recompute effective prog arrays */
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
err = compute_effective_progs(desc, type, &desc->bpf.inactive);
if (err)
goto cleanup;
}
/* all allocations were successful. Activate all prog arrays */
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
activate_effective_progs(desc, type, desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
static_branch_inc(&cgroup_bpf_enabled_key);
if (old_prog) { if (old_prog) {
bpf_prog_put(old_prog); bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key); static_branch_dec(&cgroup_bpf_enabled_key);
} }
return 0; return 0;
cleanup:
/* oom while computing effective. Free all computed effective arrays
* since they were not activated
*/
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
bpf_prog_array_free(desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
/* and cleanup the prog list */
pl->prog = old_prog;
if (pl_was_allocated) {
list_del(&pl->node);
kfree(pl);
}
return err;
}
/**
* __cgroup_bpf_detach() - Detach the program from a cgroup, and
* propagate the change to descendants
* @cgrp: The cgroup which descendants to traverse
* @prog: A program to detach or NULL
* @type: Type of detach operation
*
* Must be called with cgroup_mutex held.
*/
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 unused_flags)
{
struct list_head *progs = &cgrp->bpf.progs[type];
u32 flags = cgrp->bpf.flags[type];
struct bpf_prog *old_prog = NULL;
struct cgroup_subsys_state *css;
struct bpf_prog_list *pl;
int err;
if (flags & BPF_F_ALLOW_MULTI) {
if (!prog)
/* to detach MULTI prog the user has to specify valid FD
* of the program to be detached
*/
return -EINVAL;
} else {
if (list_empty(progs))
/* report error when trying to detach and nothing is attached */
return -ENOENT;
}
if (flags & BPF_F_ALLOW_MULTI) {
/* find the prog and detach it */
list_for_each_entry(pl, progs, node) {
if (pl->prog != prog)
continue;
old_prog = prog;
/* mark it deleted, so it's ignored while
* recomputing effective
*/
pl->prog = NULL;
break;
}
if (!old_prog)
return -ENOENT;
} else {
/* to maintain backward compatibility NONE and OVERRIDE cgroups
* allow detaching with invalid FD (prog==NULL)
*/
pl = list_first_entry(progs, typeof(*pl), node);
old_prog = pl->prog;
pl->prog = NULL;
}
/* allocate and recompute effective prog arrays */
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
err = compute_effective_progs(desc, type, &desc->bpf.inactive);
if (err)
goto cleanup;
}
/* all allocations were successful. Activate all prog arrays */
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
activate_effective_progs(desc, type, desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
/* now can actually delete it from this cgroup list */
list_del(&pl->node);
kfree(pl);
if (list_empty(progs))
/* last program was detached, reset flags to zero */
cgrp->bpf.flags[type] = 0;
bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key);
return 0;
cleanup:
/* oom while computing effective. Free all computed effective arrays
* since they were not activated
*/
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
bpf_prog_array_free(desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
/* and restore back old_prog */
pl->prog = old_prog;
return err;
} }
/** /**
@ -403,25 +171,35 @@ int __cgroup_bpf_run_filter(struct sock *sk,
struct sk_buff *skb, struct sk_buff *skb,
enum bpf_attach_type type) enum bpf_attach_type type)
{ {
unsigned int offset = skb->data - skb_network_header(skb); struct bpf_prog *prog;
struct sock *save_sk;
struct cgroup *cgrp; struct cgroup *cgrp;
int ret; int ret = 0;
if (!sk || !sk_fullsock(sk)) if (!sk || !sk_fullsock(sk))
return 0; return 0;
if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) if (sk->sk_family != AF_INET &&
sk->sk_family != AF_INET6)
return 0; return 0;
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
save_sk = skb->sk;
skb->sk = sk; rcu_read_lock();
__skb_push(skb, offset);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, prog = rcu_dereference(cgrp->bpf.effective[type]);
bpf_prog_run_save_cb); if (prog) {
__skb_pull(skb, offset); unsigned int offset = skb->data - skb_network_header(skb);
skb->sk = save_sk; struct sock *save_sk = skb->sk;
return ret == 1 ? 0 : -EPERM;
skb->sk = sk;
__skb_push(skb, offset);
ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
__skb_pull(skb, offset);
skb->sk = save_sk;
}
rcu_read_unlock();
return ret;
} }
EXPORT_SYMBOL(__cgroup_bpf_run_filter); EXPORT_SYMBOL(__cgroup_bpf_run_filter);

View file

@ -1080,37 +1080,6 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
} }
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
/* to avoid allocating empty bpf_prog_array for cgroups that
* don't have bpf program attached use one global 'empty_prog_array'
* It will not be modified the caller of bpf_prog_array_alloc()
* (since caller requested prog_cnt == 0)
* that pointer should be 'freed' by bpf_prog_array_free()
*/
static struct {
struct bpf_prog_array hdr;
struct bpf_prog *null_prog;
} empty_prog_array = {
.null_prog = NULL,
};
struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
{
if (prog_cnt)
return kzalloc(sizeof(struct bpf_prog_array) +
sizeof(struct bpf_prog *) * (prog_cnt + 1),
flags);
return &empty_prog_array.hdr;
}
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
{
if (!progs ||
progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
return;
kfree_rcu(progs, rcu);
}
static void bpf_prog_free_deferred(struct work_struct *work) static void bpf_prog_free_deferred(struct work_struct *work)
{ {
struct bpf_prog_aux *aux; struct bpf_prog_aux *aux;

View file

@ -894,9 +894,6 @@ static int bpf_obj_get(const union bpf_attr *attr)
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
#define BPF_F_ATTACH_MASK \
(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
static int bpf_prog_attach(const union bpf_attr *attr) static int bpf_prog_attach(const union bpf_attr *attr)
{ {
struct bpf_prog *prog; struct bpf_prog *prog;
@ -909,7 +906,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
if (CHECK_ATTR(BPF_PROG_ATTACH)) if (CHECK_ATTR(BPF_PROG_ATTACH))
return -EINVAL; return -EINVAL;
if (attr->attach_flags & ~BPF_F_ATTACH_MASK) if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
return -EINVAL; return -EINVAL;
switch (attr->attach_type) { switch (attr->attach_type) {
@ -926,8 +923,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
return PTR_ERR(cgrp); return PTR_ERR(cgrp);
} }
ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
attr->attach_flags); attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
if (ret) if (ret)
bpf_prog_put(prog); bpf_prog_put(prog);
cgroup_put(cgrp); cgroup_put(cgrp);
@ -944,8 +941,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
static int bpf_prog_detach(const union bpf_attr *attr) static int bpf_prog_detach(const union bpf_attr *attr)
{ {
enum bpf_prog_type ptype;
struct bpf_prog *prog;
struct cgroup *cgrp; struct cgroup *cgrp;
int ret; int ret;
@ -958,25 +953,18 @@ static int bpf_prog_detach(const union bpf_attr *attr)
switch (attr->attach_type) { switch (attr->attach_type) {
case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS: case BPF_CGROUP_INET_EGRESS:
ptype = BPF_PROG_TYPE_CGROUP_SKB; cgrp = cgroup_get_from_fd(attr->target_fd);
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
cgroup_put(cgrp);
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
cgrp = cgroup_get_from_fd(attr->target_fd);
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
if (IS_ERR(prog))
prog = NULL;
ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
if (prog)
bpf_prog_put(prog);
cgroup_put(cgrp);
return ret; return ret;
} }
#endif /* CONFIG_CGROUP_BPF */ #endif /* CONFIG_CGROUP_BPF */

View file

@ -2020,9 +2020,6 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
if (ret) if (ret)
goto destroy_root; goto destroy_root;
ret = cgroup_bpf_inherit(root_cgrp);
WARN_ON_ONCE(ret);
trace_cgroup_setup_root(root); trace_cgroup_setup_root(root);
/* /*
@ -5386,9 +5383,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
cgrp->self.parent = &parent->self; cgrp->self.parent = &parent->self;
cgrp->root = root; cgrp->root = root;
cgrp->level = level; cgrp->level = level;
ret = cgroup_bpf_inherit(cgrp);
if (ret)
goto out_idr_free;
for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp))
cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
@ -5425,6 +5419,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
goto out_idr_free; goto out_idr_free;
} }
if (parent)
cgroup_bpf_inherit(cgrp, parent);
cgroup_propagate_control(cgrp); cgroup_propagate_control(cgrp);
return cgrp; return cgrp;
@ -6647,23 +6644,14 @@ static __init int cgroup_namespaces_init(void)
subsys_initcall(cgroup_namespaces_init); subsys_initcall(cgroup_namespaces_init);
#ifdef CONFIG_CGROUP_BPF #ifdef CONFIG_CGROUP_BPF
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags) enum bpf_attach_type type, bool overridable)
{ {
struct cgroup *parent = cgroup_parent(cgrp);
int ret; int ret;
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_mutex);
ret = __cgroup_bpf_attach(cgrp, prog, type, flags); ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
mutex_unlock(&cgroup_mutex);
return ret;
}
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags)
{
int ret;
mutex_lock(&cgroup_mutex);
ret = __cgroup_bpf_detach(cgrp, prog, type, flags);
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
return ret; return ret;
} }