bpf: add napi_id read access to __sk_buff
Add napi_id access to __sk_buff for socket filter program types, tc program types and other bpf_convert_ctx_access() users. Having access to skb->napi_id is useful for per RX queue listener siloing, f.e. in combination with SO_ATTACH_REUSEPORT_EBPF and when busy polling is used, meaning SO_REUSEPORT enabled listeners can then select the corresponding socket at SYN time already [1]. The skb is marked via skb_mark_napi_id() early in the receive path (e.g., napi_gro_receive()). Currently, sockets can only use SO_INCOMING_NAPI_ID from 6d4339028b35 ("net: Introduce SO_INCOMING_NAPI_ID") as a socket option to look up the NAPI ID associated with the queue for steering, which requires a prior sk_mark_napi_id() after the socket was looked up. Semantics for the __sk_buff napi_id access are similar, meaning if skb->napi_id is < MIN_NAPI_ID (e.g. outgoing packets using sender_cpu), then an invalid napi_id of 0 is returned to the program, otherwise a valid non-zero napi_id. [1] http://netdevconf.org/2.1/slides/apr6/dumazet-BUSY-POLLING-Netdev-2.1.pdf Suggested-by: Eric Dumazet <edumazet@google.com> Change-Id: I41744d03eb67dcef27b412f4dedba1b9c78c1d17 Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
fa05c6f944
commit
63ad3e9d66
3 changed files with 18 additions and 0 deletions
|
@ -715,6 +715,7 @@ struct __sk_buff {
|
||||||
__u32 tc_classid;
|
__u32 tc_classid;
|
||||||
__u32 data;
|
__u32 data;
|
||||||
__u32 data_end;
|
__u32 data_end;
|
||||||
|
__u32 napi_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bpf_tunnel_key {
|
struct bpf_tunnel_key {
|
||||||
|
|
|
@ -52,6 +52,7 @@
|
||||||
#include <net/dst_metadata.h>
|
#include <net/dst_metadata.h>
|
||||||
#include <net/dst.h>
|
#include <net/dst.h>
|
||||||
#include <net/sock_reuseport.h>
|
#include <net/sock_reuseport.h>
|
||||||
|
#include <net/busy_poll.h>
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
#include <linux/bpf_trace.h>
|
#include <linux/bpf_trace.h>
|
||||||
|
|
||||||
|
@ -3675,6 +3676,19 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
|
||||||
*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
|
*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
|
||||||
else
|
else
|
||||||
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
|
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case offsetof(struct __sk_buff, napi_id):
|
||||||
|
#if defined(CONFIG_NET_RX_BUSY_POLL)
|
||||||
|
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4);
|
||||||
|
|
||||||
|
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
|
||||||
|
offsetof(struct sk_buff, napi_id));
|
||||||
|
*insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
|
||||||
|
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
|
||||||
|
#else
|
||||||
|
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -742,6 +742,9 @@ static struct bpf_test tests[] = {
|
||||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||||
offsetof(struct __sk_buff, vlan_tci)),
|
offsetof(struct __sk_buff, vlan_tci)),
|
||||||
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
|
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
|
||||||
|
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||||
|
offsetof(struct __sk_buff, napi_id)),
|
||||||
|
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
.result = ACCEPT,
|
.result = ACCEPT,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue