Skip to content

Commit a74566a

Browse files
q2venKernel Patches Daemon
authored andcommitted
bpf: Introduce SK_BPF_MEMCG_FLAGS and SK_BPF_MEMCG_SOCK_ISOLATED.
We will decouple sockets from the global protocol memory accounting if sockets have SK_BPF_MEMCG_SOCK_ISOLATED. This can be flagged (and cleared) at the BPF_CGROUP_INET_SOCK_CREATE hook by bpf_setsockopt() and is inherited to child sockets. u32 flags = SK_BPF_MEMCG_SOCK_ISOLATED; bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_MEMCG_FLAGS, &flags, sizeof(flags)); SK_BPF_MEMCG_FLAGS is only supported at BPF_CGROUP_INET_SOCK_CREATE and not supported on other hooks for some reasons: 1. UDP charges memory under sk->sk_receive_queue.lock instead of lock_sock() 2. For TCP child sockets, memory accounting is adjusted only in __inet_accept() which sk->sk_memcg allocation is deferred to 3. Modifying the flag after skb is charged to sk requires such adjustment during bpf_setsockopt() and complicates the logic unnecessarily We can support other hooks later if a real use case justifies that. Given sk->sk_memcg can be accessed in the fast path, it would be preferable to place the flag field in the same cache line as sk->sk_memcg. However, struct sock does not have such a 1-byte hole. Let's store the flag in the lowest bit of sk->sk_memcg and add a helper to check the bit. In the next patch, if mem_cgroup_sk_isolated() returns true, the socket will not be charged to sk->sk_prot->memory_allocated. Signed-off-by: Kuniyuki Iwashima <[email protected]>
1 parent 93d31ca commit a74566a

File tree

6 files changed

+101
-0
lines changed

6 files changed

+101
-0
lines changed

include/net/sock.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2596,17 +2596,53 @@ static inline gfp_t gfp_memcg_charge(void)
25962596
return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
25972597
}
25982598

2599+
#define SK_BPF_MEMCG_FLAG_MASK (SK_BPF_MEMCG_FLAG_MAX - 1)
2600+
#define SK_BPF_MEMCG_PTR_MASK ~SK_BPF_MEMCG_FLAG_MASK
2601+
25992602
#ifdef CONFIG_MEMCG
26002603
static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk)
26012604
{
2605+
#ifdef CONFIG_CGROUP_BPF
2606+
unsigned long val = (unsigned long)sk->sk_memcg;
2607+
2608+
val &= SK_BPF_MEMCG_PTR_MASK;
2609+
return (struct mem_cgroup *)val;
2610+
#else
26022611
return sk->sk_memcg;
2612+
#endif
2613+
}
2614+
2615+
static inline void mem_cgroup_sk_set_flags(struct sock *sk, unsigned short flags)
2616+
{
2617+
#ifdef CONFIG_CGROUP_BPF
2618+
unsigned long val = (unsigned long)mem_cgroup_from_sk(sk);
2619+
2620+
val |= flags;
2621+
sk->sk_memcg = (struct mem_cgroup *)val;
2622+
#endif
2623+
}
2624+
2625+
static inline unsigned short mem_cgroup_sk_get_flags(const struct sock *sk)
2626+
{
2627+
#ifdef CONFIG_CGROUP_BPF
2628+
unsigned long val = (unsigned long)sk->sk_memcg;
2629+
2630+
return val & SK_BPF_MEMCG_FLAG_MASK;
2631+
#else
2632+
return 0;
2633+
#endif
26032634
}
26042635

26052636
static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
26062637
{
26072638
return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk);
26082639
}
26092640

2641+
static inline bool mem_cgroup_sk_isolated(const struct sock *sk)
2642+
{
2643+
return mem_cgroup_sk_get_flags(sk) & SK_BPF_MEMCG_SOCK_ISOLATED;
2644+
}
2645+
26102646
static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
26112647
{
26122648
struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
@@ -2629,11 +2665,25 @@ static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk)
26292665
return NULL;
26302666
}
26312667

2668+
static inline void mem_cgroup_sk_set_flags(struct sock *sk, unsigned short flags)
2669+
{
2670+
}
2671+
2672+
static inline unsigned short mem_cgroup_sk_get_flags(const struct sock *sk)
2673+
{
2674+
return 0;
2675+
}
2676+
26322677
static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
26332678
{
26342679
return false;
26352680
}
26362681

2682+
static inline bool mem_cgroup_sk_isolated(const struct sock *sk)
2683+
{
2684+
return false;
2685+
}
2686+
26372687
static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
26382688
{
26392689
return false;

include/uapi/linux/bpf.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7182,6 +7182,7 @@ enum {
71827182
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
71837183
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
71847184
SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */
7185+
SK_BPF_MEMCG_FLAGS = 1010, /* Get or Set flags saved in sk->sk_memcg */
71857186
};
71867187

71877188
enum {
@@ -7204,6 +7205,11 @@ enum {
72047205
*/
72057206
};
72067207

7208+
enum {
7209+
SK_BPF_MEMCG_SOCK_ISOLATED = (1UL << 0),
7210+
SK_BPF_MEMCG_FLAG_MAX = (1UL << 1),
7211+
};
7212+
72077213
struct bpf_perf_event_value {
72087214
__u64 counter;
72097215
__u64 enabled;

net/core/filter.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5723,9 +5723,39 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
57235723
.arg5_type = ARG_CONST_SIZE,
57245724
};
57255725

5726+
static int sk_bpf_set_get_memcg_flags(struct sock *sk,
5727+
char *optval, int optlen,
5728+
bool getopt)
5729+
{
5730+
u32 flags;
5731+
5732+
if (optlen != sizeof(u32))
5733+
return -EINVAL;
5734+
5735+
if (!sk_has_account(sk))
5736+
return -EOPNOTSUPP;
5737+
5738+
if (getopt) {
5739+
*(u32 *)optval = mem_cgroup_sk_get_flags(sk);
5740+
return 0;
5741+
}
5742+
5743+
flags = *(u32 *)optval;
5744+
if (flags >= SK_BPF_MEMCG_FLAG_MAX)
5745+
return -EINVAL;
5746+
5747+
mem_cgroup_sk_set_flags(sk, flags);
5748+
5749+
return 0;
5750+
}
5751+
57265752
BPF_CALL_5(bpf_sock_create_setsockopt, struct sock *, sk, int, level,
57275753
int, optname, char *, optval, int, optlen)
57285754
{
5755+
if (IS_ENABLED(CONFIG_MEMCG) &&
5756+
level == SOL_SOCKET && optname == SK_BPF_MEMCG_FLAGS)
5757+
return sk_bpf_set_get_memcg_flags(sk, optval, optlen, false);
5758+
57295759
return __bpf_setsockopt(sk, level, optname, optval, optlen);
57305760
}
57315761

@@ -5743,6 +5773,10 @@ static const struct bpf_func_proto bpf_sock_create_setsockopt_proto = {
57435773
BPF_CALL_5(bpf_sock_create_getsockopt, struct sock *, sk, int, level,
57445774
int, optname, char *, optval, int, optlen)
57455775
{
5776+
if (IS_ENABLED(CONFIG_MEMCG) &&
5777+
level == SOL_SOCKET && optname == SK_BPF_MEMCG_FLAGS)
5778+
return sk_bpf_set_get_memcg_flags(sk, optval, optlen, true);
5779+
57465780
return __bpf_getsockopt(sk, level, optname, optval, optlen);
57475781
}
57485782

net/core/sock.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2515,6 +2515,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
25152515
#ifdef CONFIG_MEMCG
25162516
/* sk->sk_memcg will be populated at accept() time */
25172517
newsk->sk_memcg = NULL;
2518+
mem_cgroup_sk_set_flags(newsk, mem_cgroup_sk_get_flags(sk));
25182519
#endif
25192520

25202521
cgroup_sk_clone(&newsk->sk_cgrp_data);

net/ipv4/af_inet.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,12 +758,16 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new
758758
(!IS_ENABLED(CONFIG_IP_SCTP) ||
759759
sk_is_tcp(newsk) || sk_is_mptcp(newsk))) {
760760
gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
761+
unsigned short flags;
761762

763+
flags = mem_cgroup_sk_get_flags(newsk);
762764
mem_cgroup_sk_alloc(newsk);
763765

764766
if (mem_cgroup_from_sk(newsk)) {
765767
int amt;
766768

769+
mem_cgroup_sk_set_flags(newsk, flags);
770+
767771
/* The socket has not been accepted yet, no need
768772
* to look at newsk->sk_wmem_queued.
769773
*/

tools/include/uapi/linux/bpf.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7182,6 +7182,7 @@ enum {
71827182
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
71837183
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
71847184
SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */
7185+
SK_BPF_MEMCG_FLAGS = 1010, /* Get or Set flags saved in sk->sk_memcg */
71857186
};
71867187

71877188
enum {
@@ -7204,6 +7205,11 @@ enum {
72047205
*/
72057206
};
72067207

7208+
enum {
7209+
SK_BPF_MEMCG_SOCK_ISOLATED = (1UL << 0),
7210+
SK_BPF_MEMCG_FLAG_MAX = (1UL << 1),
7211+
};
7212+
72077213
struct bpf_perf_event_value {
72087214
__u64 counter;
72097215
__u64 enabled;

0 commit comments

Comments
 (0)