Revert "BACKPORT: bpf: multi program support for cgroup+bpf"

This reverts commit 148f111e98.
2024-07-24 16:19:53 +03:00 · 2024-07-24 16:19:53 +03:00 · 4fce632291
commit 4fce632291
parent dd475f3220
8 changed files with 142 additions and 501 deletions
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@ -13,42 +13,27 @@ struct sk_buff;
 extern struct static_key_false cgroup_bpf_enabled_key;
 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)

-struct bpf_prog_list {
-	struct list_head node;
-	struct bpf_prog *prog;
-};
-
-struct bpf_prog_array;
-
 struct cgroup_bpf {
-	/* array of effective progs in this cgroup */
-	struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE];
-
-	/* attached progs to this cgroup and attach flags
-	 * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
-	 * have either zero or one element
-	 * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
+	/*
+	 * Store two sets of bpf_prog pointers, one for programs that are
+	 * pinned directly to this cgroup, and one for those that are effective
+	 * when this cgroup is accessed.
 	 */
-	struct list_head progs[MAX_BPF_ATTACH_TYPE];
-	u32 flags[MAX_BPF_ATTACH_TYPE];
-
-	/* temp storage for effective prog array used by prog_attach/detach */
-	struct bpf_prog_array __rcu *inactive;
+	struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
+	struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE];
+	bool disallow_override[MAX_BPF_ATTACH_TYPE];
 };

 void cgroup_bpf_put(struct cgroup *cgrp);
-int cgroup_bpf_inherit(struct cgroup *cgrp);
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);

-int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
-			enum bpf_attach_type type, u32 flags);
-int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-			enum bpf_attach_type type, u32 flags);
+int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
+			struct bpf_prog *prog, enum bpf_attach_type type,
+			bool overridable);

-/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
-int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
-		      enum bpf_attach_type type, u32 flags);
-int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-		      enum bpf_attach_type type, u32 flags);
+/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
+int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
+		      enum bpf_attach_type type, bool overridable);

 int __cgroup_bpf_run_filter(struct sock *sk,
 			    struct sk_buff *skb,
@ -81,7 +66,8 @@ int __cgroup_bpf_run_filter(struct sock *sk,

 struct cgroup_bpf {};
 static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
-static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
+static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
+				      struct cgroup *parent) {}

 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@ -241,38 +241,6 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);

-/* an array of programs to be executed under rcu_lock.
- *
- * Typical usage:
- * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN);
- *
- * the structure returned by bpf_prog_array_alloc() should be populated
- * with program pointers and the last pointer must be NULL.
- * The user has to keep refcnt on the program and make sure the program
- * is removed from the array before bpf_prog_put().
- * The 'struct bpf_prog_array *' should only be replaced with xchg()
- * since other cpus are walking the array of pointers in parallel.
- */
-struct bpf_prog_array {
-	struct rcu_head rcu;
-	struct bpf_prog *progs[0];
-};
-
-struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
-void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
-
-#define BPF_PROG_RUN_ARRAY(array, ctx, func)		\
-	({						\
-		struct bpf_prog **_prog;		\
-		u32 _ret = 1;				\
-		rcu_read_lock();			\
-		_prog = rcu_dereference(array)->progs;	\
-		for (; *_prog; _prog++)			\
-			_ret &= func(*_prog, ctx);	\
-		rcu_read_unlock();			\
-		_ret;					\
-	 })
-
 #ifdef CONFIG_BPF_SYSCALL
 DECLARE_PER_CPU(int, bpf_prog_active);

--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@ -423,7 +423,7 @@ struct sk_filter {
 	struct bpf_prog	*prog;
 };

-#define BPF_PROG_RUN(filter, ctx)  (*(filter)->bpf_func)(ctx, (filter)->insnsi)
+#define BPF_PROG_RUN(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)

 #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN

--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@ -109,47 +109,11 @@ enum bpf_attach_type {

 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

-/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
- *
- * NONE(default): No further bpf programs allowed in the subtree.
- *
- * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
- * the program in this cgroup yields to sub-cgroup program.
- *
- * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
- * that cgroup program gets run in addition to the program in this cgroup.
- *
- * Only one program is allowed to be attached to a cgroup with
- * NONE or BPF_F_ALLOW_OVERRIDE flag.
- * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
- * release old program and attach the new one. Attach flags has to match.
- *
- * Multiple programs are allowed to be attached to a cgroup with
- * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
- * (those that were attached first, run first)
- * The programs of sub-cgroup are executed first, then programs of
- * this cgroup and then programs of parent cgroup.
- * When children program makes decision (like picking TCP CA or sock bind)
- * parent program has a chance to override it.
- *
- * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
- * A cgroup with NONE doesn't allow any programs in sub-cgroups.
- * Ex1:
- * cgrp1 (MULTI progs A, B) ->
- *    cgrp2 (OVERRIDE prog C) ->
- *      cgrp3 (MULTI prog D) ->
- *        cgrp4 (OVERRIDE prog E) ->
- *          cgrp5 (NONE prog F)
- * the event in cgrp5 triggers execution of F,D,A,B in that order.
- * if prog F is detached, the execution is E,D,A,B
- * if prog F and D are detached, the execution is E,A,B
- * if prog F, E and D are detached, the execution is C,A,B
- *
- * All eligible programs are executed regardless of return code from
- * earlier programs.
+/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
+ * to the given target_fd cgroup the descendent cgroup will be able to
+ * override effective bpf program that was inherited from this cgroup
 */
 #define BPF_F_ALLOW_OVERRIDE	(1U << 0)
-#define BPF_F_ALLOW_MULTI	(1U << 1)

 #define BPF_PSEUDO_MAP_FD	1

--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@ -27,361 +27,129 @@ void cgroup_bpf_put(struct cgroup *cgrp)
 {
 	unsigned int type;

-	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
-		struct list_head *progs = &cgrp->bpf.progs[type];
-		struct bpf_prog_list *pl, *tmp;
+	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
+		struct bpf_prog *prog = cgrp->bpf.prog[type];

-		list_for_each_entry_safe(pl, tmp, progs, node) {
-			list_del(&pl->node);
-			bpf_prog_put(pl->prog);
-			kfree(pl);
+		if (prog) {
+			bpf_prog_put(prog);
 			static_branch_dec(&cgroup_bpf_enabled_key);
 		}
-		bpf_prog_array_free(cgrp->bpf.effective[type]);
 	}
 }

-/* count number of elements in the list.
- * it's slow but the list cannot be long
- */
-static u32 prog_list_length(struct list_head *head)
-{
-	struct bpf_prog_list *pl;
-	u32 cnt = 0;
-
-	list_for_each_entry(pl, head, node) {
-		if (!pl->prog)
-			continue;
-		cnt++;
-	}
-	return cnt;
-}
-
-/* if parent has non-overridable prog attached,
- * disallow attaching new programs to the descendent cgroup.
- * if parent has overridable or multi-prog, allow attaching
- */
-static bool hierarchy_allows_attach(struct cgroup *cgrp,
-				    enum bpf_attach_type type,
-				    u32 new_flags)
-{
-	struct cgroup *p;
-
-	p = cgroup_parent(cgrp);
-	if (!p)
-		return true;
-	do {
-		u32 flags = p->bpf.flags[type];
-		u32 cnt;
-
-		if (flags & BPF_F_ALLOW_MULTI)
-			return true;
-		cnt = prog_list_length(&p->bpf.progs[type]);
-		WARN_ON_ONCE(cnt > 1);
-		if (cnt == 1)
-			return !!(flags & BPF_F_ALLOW_OVERRIDE);
-		p = cgroup_parent(p);
-	} while (p);
-	return true;
-}
-
-/* compute a chain of effective programs for a given cgroup:
- * start from the list of programs in this cgroup and add
- * all parent programs.
- * Note that parent's F_ALLOW_OVERRIDE-type program is yielding
- * to programs in this cgroup
- */
-static int compute_effective_progs(struct cgroup *cgrp,
-				   enum bpf_attach_type type,
-				   struct bpf_prog_array __rcu **array)
-{
-	struct bpf_prog_array __rcu *progs;
-	struct bpf_prog_list *pl;
-	struct cgroup *p = cgrp;
-	int cnt = 0;
-
-	/* count number of effective programs by walking parents */
-	do {
-		if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
-			cnt += prog_list_length(&p->bpf.progs[type]);
-		p = cgroup_parent(p);
-	} while (p);
-
-	progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
-	if (!progs)
-		return -ENOMEM;
-
-	/* populate the array with effective progs */
-	cnt = 0;
-	p = cgrp;
-	do {
-		if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
-			list_for_each_entry(pl,
-					    &p->bpf.progs[type], node) {
-				if (!pl->prog)
-					continue;
-				rcu_dereference_protected(progs, 1)->
-					progs[cnt++] = pl->prog;
-			}
-		p = cgroup_parent(p);
-	} while (p);
-
-	*array = progs;
-	return 0;
-}
-
-static void activate_effective_progs(struct cgroup *cgrp,
-				     enum bpf_attach_type type,
-				     struct bpf_prog_array __rcu *array)
-{
-	struct bpf_prog_array __rcu *old_array;
-
-	old_array = xchg(&cgrp->bpf.effective[type], array);
-	/* free prog array after grace period, since __cgroup_bpf_run_*()
-	 * might be still walking the array
-	 */
-	bpf_prog_array_free(old_array);
-}
-
 /**
 * cgroup_bpf_inherit() - inherit effective programs from parent
 * @cgrp: the cgroup to modify
+ * @parent: the parent to inherit from
 */
-int cgroup_bpf_inherit(struct cgroup *cgrp)
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
 {
-/* has to use marco instead of const int, since compiler thinks
- * that array below is variable length
- */
-#define	NR ARRAY_SIZE(cgrp->bpf.effective)
-	struct bpf_prog_array __rcu *arrays[NR] = {};
-	int i;
+	unsigned int type;

-	for (i = 0; i < NR; i++)
-		INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
+	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
+		struct bpf_prog *e;

-	for (i = 0; i < NR; i++)
-		if (compute_effective_progs(cgrp, i, &arrays[i]))
-			goto cleanup;
-
-	for (i = 0; i < NR; i++)
-		activate_effective_progs(cgrp, i, arrays[i]);
-
-	return 0;
-cleanup:
-	for (i = 0; i < NR; i++)
-		bpf_prog_array_free(arrays[i]);
-	return -ENOMEM;
+		e = rcu_dereference_protected(parent->bpf.effective[type],
+					      lockdep_is_held(&cgroup_mutex));
+		rcu_assign_pointer(cgrp->bpf.effective[type], e);
+		cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
+	}
 }

-#define BPF_CGROUP_MAX_PROGS 64
-
 /**
- * __cgroup_bpf_attach() - Attach the program to a cgroup, and
+ * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
 *                         propagate the change to descendants
 * @cgrp: The cgroup which descendants to traverse
- * @prog: A program to attach
- * @type: Type of attach operation
+ * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
+ * @prog: A new program to pin
+ * @type: Type of pinning operation (ingress/egress)
+ *
+ * Each cgroup has a set of two pointers for bpf programs; one for eBPF
+ * programs it owns, and which is effective for execution.
+ *
+ * If @prog is not %NULL, this function attaches a new program to the cgroup
+ * and releases the one that is currently attached, if any. @prog is then made
+ * the effective program of type @type in that cgroup.
+ *
+ * If @prog is %NULL, the currently attached program of type @type is released,
+ * and the effective program of the parent cgroup (if any) is inherited to
+ * @cgrp.
+ *
+ * Then, the descendants of @cgrp are walked and the effective program for
+ * each of them is set to the effective program of @cgrp unless the
+ * descendant has its own program attached, in which case the subbranch is
+ * skipped. This ensures that delegated subcgroups with own programs are left
+ * untouched.
 *
 * Must be called with cgroup_mutex held.
 */
-int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
-			enum bpf_attach_type type, u32 flags)
+int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
+			struct bpf_prog *prog, enum bpf_attach_type type,
+			bool new_overridable)
 {
-	struct list_head *progs = &cgrp->bpf.progs[type];
-	struct bpf_prog *old_prog = NULL;
-	struct cgroup_subsys_state *css;
-	struct bpf_prog_list *pl;
-	bool pl_was_allocated;
-	u32 old_flags;
-	int err;
+	struct bpf_prog *old_prog, *effective = NULL;
+	struct cgroup_subsys_state *pos;
+	bool overridable = true;

-	if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
-		/* invalid combination */
-		return -EINVAL;
+	if (parent) {
+		overridable = !parent->bpf.disallow_override[type];
+		effective = rcu_dereference_protected(parent->bpf.effective[type],
+						      lockdep_is_held(&cgroup_mutex));
+	}

-	if (!hierarchy_allows_attach(cgrp, type, flags))
-		return -EPERM;
-
-	if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
-		/* Disallow attaching non-overridable on top
-		 * of existing overridable in this cgroup.
-		 * Disallow attaching multi-prog if overridable or none
+	if (prog && effective && !overridable)
+		/* if parent has non-overridable prog attached, disallow
+		 * attaching new programs to descendent cgroup
 		 */
 		return -EPERM;

-	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
-		return -E2BIG;
+	if (prog && effective && overridable != new_overridable)
+		/* if parent has overridable prog attached, only
+		 * allow overridable programs in descendent cgroup
+		 */
+		return -EPERM;

-	if (flags & BPF_F_ALLOW_MULTI) {
-		list_for_each_entry(pl, progs, node)
-			if (pl->prog == prog)
-				/* disallow attaching the same prog twice */
-				return -EINVAL;
+	old_prog = cgrp->bpf.prog[type];

-		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
-		if (!pl)
-			return -ENOMEM;
-		pl_was_allocated = true;
-		pl->prog = prog;
-		list_add_tail(&pl->node, progs);
-	} else {
-		if (list_empty(progs)) {
-			pl = kmalloc(sizeof(*pl), GFP_KERNEL);
-			if (!pl)
-				return -ENOMEM;
-			pl_was_allocated = true;
-			list_add_tail(&pl->node, progs);
+	if (prog) {
+		overridable = new_overridable;
+		effective = prog;
+		if (old_prog &&
+		    cgrp->bpf.disallow_override[type] == new_overridable)
+			/* disallow attaching non-overridable on top
+			 * of existing overridable in this cgroup
+			 * and vice versa
+			 */
+			return -EPERM;
+	}
+
+	if (!prog && !old_prog)
+		/* report error when trying to detach and nothing is attached */
+		return -ENOENT;
+
+	cgrp->bpf.prog[type] = prog;
+
+	css_for_each_descendant_pre(pos, &cgrp->self) {
+		struct cgroup *desc = container_of(pos, struct cgroup, self);
+
+		/* skip the subtree if the descendant has its own program */
+		if (desc->bpf.prog[type] && desc != cgrp) {
+			pos = css_rightmost_descendant(pos);
 		} else {
-			pl = list_first_entry(progs, typeof(*pl), node);
-			old_prog = pl->prog;
-			pl_was_allocated = false;
+			rcu_assign_pointer(desc->bpf.effective[type],
+					   effective);
+			desc->bpf.disallow_override[type] = !overridable;
 		}
-		pl->prog = prog;
 	}

-	old_flags = cgrp->bpf.flags[type];
-	cgrp->bpf.flags[type] = flags;
+	if (prog)
+		static_branch_inc(&cgroup_bpf_enabled_key);

-	/* allocate and recompute effective prog arrays */
-	css_for_each_descendant_pre(css, &cgrp->self) {
-		struct cgroup *desc = container_of(css, struct cgroup, self);
-
-		err = compute_effective_progs(desc, type, &desc->bpf.inactive);
-		if (err)
-			goto cleanup;
-	}
-
-	/* all allocations were successful. Activate all prog arrays */
-	css_for_each_descendant_pre(css, &cgrp->self) {
-		struct cgroup *desc = container_of(css, struct cgroup, self);
-
-		activate_effective_progs(desc, type, desc->bpf.inactive);
-		desc->bpf.inactive = NULL;
-	}
-
-	static_branch_inc(&cgroup_bpf_enabled_key);
 	if (old_prog) {
 		bpf_prog_put(old_prog);
 		static_branch_dec(&cgroup_bpf_enabled_key);
 	}
 	return 0;
-
-cleanup:
-	/* oom while computing effective. Free all computed effective arrays
-	 * since they were not activated
-	 */
-	css_for_each_descendant_pre(css, &cgrp->self) {
-		struct cgroup *desc = container_of(css, struct cgroup, self);
-
-		bpf_prog_array_free(desc->bpf.inactive);
-		desc->bpf.inactive = NULL;
-	}
-
-	/* and cleanup the prog list */
-	pl->prog = old_prog;
-	if (pl_was_allocated) {
-		list_del(&pl->node);
-		kfree(pl);
-	}
-	return err;
-}
-
-/**
- * __cgroup_bpf_detach() - Detach the program from a cgroup, and
- *                         propagate the change to descendants
- * @cgrp: The cgroup which descendants to traverse
- * @prog: A program to detach or NULL
- * @type: Type of detach operation
- *
- * Must be called with cgroup_mutex held.
- */
-int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-			enum bpf_attach_type type, u32 unused_flags)
-{
-	struct list_head *progs = &cgrp->bpf.progs[type];
-	u32 flags = cgrp->bpf.flags[type];
-	struct bpf_prog *old_prog = NULL;
-	struct cgroup_subsys_state *css;
-	struct bpf_prog_list *pl;
-	int err;
-
-	if (flags & BPF_F_ALLOW_MULTI) {
-		if (!prog)
-			/* to detach MULTI prog the user has to specify valid FD
-			 * of the program to be detached
-			 */
-			return -EINVAL;
-	} else {
-		if (list_empty(progs))
-			/* report error when trying to detach and nothing is attached */
-			return -ENOENT;
-	}
-
-	if (flags & BPF_F_ALLOW_MULTI) {
-		/* find the prog and detach it */
-		list_for_each_entry(pl, progs, node) {
-			if (pl->prog != prog)
-				continue;
-			old_prog = prog;
-			/* mark it deleted, so it's ignored while
-			 * recomputing effective
-			 */
-			pl->prog = NULL;
-			break;
-		}
-		if (!old_prog)
-			return -ENOENT;
-	} else {
-		/* to maintain backward compatibility NONE and OVERRIDE cgroups
-		 * allow detaching with invalid FD (prog==NULL)
-		 */
-		pl = list_first_entry(progs, typeof(*pl), node);
-		old_prog = pl->prog;
-		pl->prog = NULL;
-	}
-
-	/* allocate and recompute effective prog arrays */
-	css_for_each_descendant_pre(css, &cgrp->self) {
-		struct cgroup *desc = container_of(css, struct cgroup, self);
-
-		err = compute_effective_progs(desc, type, &desc->bpf.inactive);
-		if (err)
-			goto cleanup;
-	}
-
-	/* all allocations were successful. Activate all prog arrays */
-	css_for_each_descendant_pre(css, &cgrp->self) {
-		struct cgroup *desc = container_of(css, struct cgroup, self);
-
-		activate_effective_progs(desc, type, desc->bpf.inactive);
-		desc->bpf.inactive = NULL;
-	}
-
-	/* now can actually delete it from this cgroup list */
-	list_del(&pl->node);
-	kfree(pl);
-	if (list_empty(progs))
-		/* last program was detached, reset flags to zero */
-		cgrp->bpf.flags[type] = 0;
-
-	bpf_prog_put(old_prog);
-	static_branch_dec(&cgroup_bpf_enabled_key);
-	return 0;
-
-cleanup:
-	/* oom while computing effective. Free all computed effective arrays
-	 * since they were not activated
-	 */
-	css_for_each_descendant_pre(css, &cgrp->self) {
-		struct cgroup *desc = container_of(css, struct cgroup, self);
-
-		bpf_prog_array_free(desc->bpf.inactive);
-		desc->bpf.inactive = NULL;
-	}
-
-	/* and restore back old_prog */
-	pl->prog = old_prog;
-	return err;
 }

 /**
@ -403,25 +171,35 @@ int __cgroup_bpf_run_filter(struct sock *sk,
 			    struct sk_buff *skb,
 			    enum bpf_attach_type type)
 {
-	unsigned int offset = skb->data - skb_network_header(skb);
-	struct sock *save_sk;
+	struct bpf_prog *prog;
 	struct cgroup *cgrp;
-	int ret;
+	int ret = 0;

 	if (!sk || !sk_fullsock(sk))
 		return 0;

-	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
+	if (sk->sk_family != AF_INET &&
+	    sk->sk_family != AF_INET6)
 		return 0;

 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-	save_sk = skb->sk;
-	skb->sk = sk;
-	__skb_push(skb, offset);
-	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
-				 bpf_prog_run_save_cb);
-	__skb_pull(skb, offset);
-	skb->sk = save_sk;
-	return ret == 1 ? 0 : -EPERM;
+
+	rcu_read_lock();
+
+	prog = rcu_dereference(cgrp->bpf.effective[type]);
+	if (prog) {
+		unsigned int offset = skb->data - skb_network_header(skb);
+		struct sock *save_sk = skb->sk;
+
+		skb->sk = sk;
+		__skb_push(skb, offset);
+		ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
+		__skb_pull(skb, offset);
+		skb->sk = save_sk;
+	}
+
+	rcu_read_unlock();
+
+	return ret;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter);
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@ -1080,37 +1080,6 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);

-/* to avoid allocating empty bpf_prog_array for cgroups that
- * don't have bpf program attached use one global 'empty_prog_array'
- * It will not be modified the caller of bpf_prog_array_alloc()
- * (since caller requested prog_cnt == 0)
- * that pointer should be 'freed' by bpf_prog_array_free()
- */
-static struct {
-	struct bpf_prog_array hdr;
-	struct bpf_prog *null_prog;
-} empty_prog_array = {
-	.null_prog = NULL,
-};
-
-struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
-{
-	if (prog_cnt)
-		return kzalloc(sizeof(struct bpf_prog_array) +
-			       sizeof(struct bpf_prog *) * (prog_cnt + 1),
-			       flags);
-
-	return &empty_prog_array.hdr;
-}
-
-void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
-{
-	if (!progs ||
-	    progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
-		return;
-	kfree_rcu(progs, rcu);
-}
-
 static void bpf_prog_free_deferred(struct work_struct *work)
 {
 	struct bpf_prog_aux *aux;
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@ -894,9 +894,6 @@ static int bpf_obj_get(const union bpf_attr *attr)

 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags

-#define BPF_F_ATTACH_MASK \
-	(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
-
 static int bpf_prog_attach(const union bpf_attr *attr)
 {
 	struct bpf_prog *prog;
@ -909,7 +906,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_PROG_ATTACH))
 		return -EINVAL;

-	if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
+	if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
 		return -EINVAL;

 	switch (attr->attach_type) {
@ -926,8 +923,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 			return PTR_ERR(cgrp);
 		}

-		ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
-					attr->attach_flags);
+		ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
+					attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
 		if (ret)
 			bpf_prog_put(prog);
 		cgroup_put(cgrp);
@ -944,8 +941,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)

 static int bpf_prog_detach(const union bpf_attr *attr)
 {
-	enum bpf_prog_type ptype;
-	struct bpf_prog *prog;
 	struct cgroup *cgrp;
 	int ret;

@ -958,25 +953,18 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	switch (attr->attach_type) {
 	case BPF_CGROUP_INET_INGRESS:
 	case BPF_CGROUP_INET_EGRESS:
-		ptype = BPF_PROG_TYPE_CGROUP_SKB;
+		cgrp = cgroup_get_from_fd(attr->target_fd);
+		if (IS_ERR(cgrp))
+			return PTR_ERR(cgrp);
+
+		ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
+		cgroup_put(cgrp);
 		break;

 	default:
 		return -EINVAL;
 	}

-	cgrp = cgroup_get_from_fd(attr->target_fd);
-	if (IS_ERR(cgrp))
-		return PTR_ERR(cgrp);
-
-	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
-	if (IS_ERR(prog))
-		prog = NULL;
-
-	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
-	if (prog)
-		bpf_prog_put(prog);
-	cgroup_put(cgrp);
 	return ret;
 }
 #endif /* CONFIG_CGROUP_BPF */
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@ -2020,9 +2020,6 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
 	if (ret)
 		goto destroy_root;

-	ret = cgroup_bpf_inherit(root_cgrp);
-	WARN_ON_ONCE(ret);
-
 	trace_cgroup_setup_root(root);

 	/*
@ -5386,9 +5383,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 	cgrp->self.parent = &parent->self;
 	cgrp->root = root;
 	cgrp->level = level;
-	ret = cgroup_bpf_inherit(cgrp);
-	if (ret)
-		goto out_idr_free;

 	for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp))
 		cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
@ -5425,6 +5419,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 			goto out_idr_free;
 	}

+	if (parent)
+		cgroup_bpf_inherit(cgrp, parent);
+
 	cgroup_propagate_control(cgrp);

 	return cgrp;
@ -6647,23 +6644,14 @@ static __init int cgroup_namespaces_init(void)
 subsys_initcall(cgroup_namespaces_init);

 #ifdef CONFIG_CGROUP_BPF
-int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
-		      enum bpf_attach_type type, u32 flags)
+int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
+		      enum bpf_attach_type type, bool overridable)
 {
+	struct cgroup *parent = cgroup_parent(cgrp);
 	int ret;

 	mutex_lock(&cgroup_mutex);
-	ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
-	mutex_unlock(&cgroup_mutex);
-	return ret;
-}
-int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-		      enum bpf_attach_type type, u32 flags)
-{
-	int ret;
-
-	mutex_lock(&cgroup_mutex);
-	ret = __cgroup_bpf_detach(cgrp, prog, type, flags);
+	ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
 	mutex_unlock(&cgroup_mutex);
 	return ret;
 }