Skip to content

Commit e9a373c

Browse files
q2venKernel Patches Daemon
authored andcommitted
selftest: bpf: Add test for SK_MEMCG_EXCLUSIVE.
The test does the following for IPv4/IPv6 x TCP/UDP sockets with/without SK_MEMCG_EXCLUSIVE, which can be turned on by net.core.memcg_exclusive or bpf_setsockopt(SK_BPF_MEMCG_EXCLUSIVE). 1. Create socket pairs 2. Send NR_PAGES (32) of data (TCP consumes around 35 pages, and UDP consuems 66 pages due to skb overhead) 3. Read memory_allocated from sk->sk_prot->memory_allocated and sk->sk_prot->memory_per_cpu_fw_alloc 4. Check if unread data is charged to memory_allocated If SK_MEMCG_EXCLUSIVE is set, memory_allocated should not be changed, but we allow a small error (up to 10 pages) in case other processes on the host use some amounts of TCP/UDP memory. The amount of allocated pages are buffered to per-cpu variable {tcp,udp}_memory_per_cpu_fw_alloc up to +/- net.core.mem_pcpu_rsv before reported to {tcp,udp}_memory_allocated. At 3., memory_allocated is calculated from the 2 variables at fentry of socket create function. We drain the receive queue only for UDP before close() because UDP recv queue is destroyed after RCU grace period. When I printed memory_allocated, UDP exclusive cases sometimes saw the non-exclusive case's leftover, but it's still in the small error range (<10 pages). bpf_trace_printk: memory_allocated: 0 <-- TCP non-exclusive bpf_trace_printk: memory_allocated: 35 bpf_trace_printk: memory_allocated: 0 <-- TCP w/ sysctl bpf_trace_printk: memory_allocated: 0 bpf_trace_printk: memory_allocated: 0 <-- TCP w/ bpf bpf_trace_printk: memory_allocated: 0 bpf_trace_printk: memory_allocated: 0 <-- UDP non-exclusive bpf_trace_printk: memory_allocated: 66 bpf_trace_printk: memory_allocated: 2 <-- UDP w/ sysctl (2 pages leftover) bpf_trace_printk: memory_allocated: 2 bpf_trace_printk: memory_allocated: 2 <-- UDP w/ bpf (2 pages leftover) bpf_trace_printk: memory_allocated: 2 We prefer finishing tests faster than oversleeping for call_rcu() + sk_destruct(). The test completes within 2s on QEMU (64 CPUs) w/ KVM. # time ./test_progs -t sk_memcg #370/1 sk_memcg/TCP :OK #370/2 sk_memcg/UDP :OK #370/3 sk_memcg/TCPv6:OK #370/4 sk_memcg/UDPv6:OK #370 sk_memcg:OK Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED real 0m1.609s user 0m0.167s sys 0m0.461s Signed-off-by: Kuniyuki Iwashima <[email protected]>
1 parent c2302e6 commit e9a373c

File tree

2 files changed

+387
-0
lines changed

2 files changed

+387
-0
lines changed
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright 2025 Google LLC */
3+
4+
#include <test_progs.h>
5+
#include "sk_memcg.skel.h"
6+
#include "network_helpers.h"
7+
8+
#define NR_PAGES 32
9+
#define NR_SOCKETS 2
10+
#define BUF_TOTAL (NR_PAGES * 4096 / NR_SOCKETS)
11+
#define BUF_SINGLE 1024
12+
#define NR_SEND (BUF_TOTAL / BUF_SINGLE)
13+
14+
struct test_case {
15+
char name[8];
16+
int family;
17+
int type;
18+
int (*create_sockets)(struct test_case *test_case, int sk[], int len);
19+
long (*get_memory_allocated)(struct test_case *test_case, struct sk_memcg *skel);
20+
};
21+
22+
static int tcp_create_sockets(struct test_case *test_case, int sk[], int len)
23+
{
24+
int server, i;
25+
26+
server = start_server(test_case->family, test_case->type, NULL, 0, 0);
27+
ASSERT_GE(server, 0, "start_server_str");
28+
29+
/* Keep for-loop so we can change NR_SOCKETS easily. */
30+
for (i = 0; i < len; i += 2) {
31+
sk[i] = connect_to_fd(server, 0);
32+
if (sk[i] < 0) {
33+
ASSERT_GE(sk[i], 0, "connect_to_fd");
34+
return sk[i];
35+
}
36+
37+
sk[i + 1] = accept(server, NULL, NULL);
38+
if (sk[i + 1] < 0) {
39+
ASSERT_GE(sk[i + 1], 0, "accept");
40+
return sk[i + 1];
41+
}
42+
}
43+
44+
close(server);
45+
46+
return 0;
47+
}
48+
49+
static int udp_create_sockets(struct test_case *test_case, int sk[], int len)
50+
{
51+
int i, j, err, rcvbuf = BUF_TOTAL;
52+
53+
/* Keep for-loop so we can change NR_SOCKETS easily. */
54+
for (i = 0; i < len; i += 2) {
55+
sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0);
56+
if (sk[i] < 0) {
57+
ASSERT_GE(sk[i], 0, "start_server");
58+
return sk[i];
59+
}
60+
61+
sk[i + 1] = connect_to_fd(sk[i], 0);
62+
if (sk[i + 1] < 0) {
63+
ASSERT_GE(sk[i + 1], 0, "connect_to_fd");
64+
return sk[i + 1];
65+
}
66+
67+
err = connect_fd_to_fd(sk[i], sk[i + 1], 0);
68+
if (err) {
69+
ASSERT_EQ(err, 0, "connect_fd_to_fd");
70+
return err;
71+
}
72+
73+
for (j = 0; j < 2; j++) {
74+
err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int));
75+
if (err) {
76+
ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)");
77+
return err;
78+
}
79+
}
80+
}
81+
82+
return 0;
83+
}
84+
85+
static long get_memory_allocated(struct test_case *test_case,
86+
bool *activated, long *memory_allocated)
87+
{
88+
int sk;
89+
90+
*activated = true;
91+
92+
/* AF_INET and AF_INET6 share the same memory_allocated.
93+
* tcp_init_sock() is called by AF_INET and AF_INET6,
94+
* but udp_lib_init_sock() is inline.
95+
*/
96+
sk = socket(AF_INET, test_case->type, 0);
97+
if (!ASSERT_GE(sk, 0, "get_memory_allocated"))
98+
return -1;
99+
100+
close(sk);
101+
102+
return *memory_allocated;
103+
}
104+
105+
static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_memcg *skel)
106+
{
107+
return get_memory_allocated(test_case,
108+
&skel->bss->tcp_activated,
109+
&skel->bss->tcp_memory_allocated);
110+
}
111+
112+
static long udp_get_memory_allocated(struct test_case *test_case, struct sk_memcg *skel)
113+
{
114+
return get_memory_allocated(test_case,
115+
&skel->bss->udp_activated,
116+
&skel->bss->udp_memory_allocated);
117+
}
118+
119+
static int check_exclusive(struct test_case *test_case,
120+
struct sk_memcg *skel, bool exclusive)
121+
{
122+
char buf[BUF_SINGLE] = {};
123+
long memory_allocated[2];
124+
int sk[NR_SOCKETS] = {};
125+
int err, i, j;
126+
127+
err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk));
128+
if (err)
129+
goto close;
130+
131+
memory_allocated[0] = test_case->get_memory_allocated(test_case, skel);
132+
133+
/* allocate pages >= NR_PAGES */
134+
for (i = 0; i < ARRAY_SIZE(sk); i++) {
135+
for (j = 0; j < NR_SEND; j++) {
136+
int bytes = send(sk[i], buf, sizeof(buf), 0);
137+
138+
/* Avoid too noisy logs when something failed. */
139+
if (bytes != sizeof(buf)) {
140+
ASSERT_EQ(bytes, sizeof(buf), "send");
141+
if (bytes < 0) {
142+
err = bytes;
143+
goto drain;
144+
}
145+
}
146+
}
147+
}
148+
149+
memory_allocated[1] = test_case->get_memory_allocated(test_case, skel);
150+
151+
if (exclusive)
152+
ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "exclusive");
153+
else
154+
ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "not exclusive");
155+
156+
drain:
157+
if (test_case->type == SOCK_DGRAM) {
158+
/* UDP starts purging sk->sk_receive_queue after one RCU
159+
* grace period, then udp_memory_allocated goes down,
160+
* so drain the queue before close().
161+
*/
162+
for (i = 0; i < ARRAY_SIZE(sk); i++) {
163+
for (j = 0; j < NR_SEND; j++) {
164+
int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC);
165+
166+
if (bytes == sizeof(buf))
167+
continue;
168+
if (bytes != -1 || errno != EAGAIN)
169+
PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno));
170+
break;
171+
}
172+
}
173+
}
174+
175+
close:
176+
for (i = 0; i < ARRAY_SIZE(sk); i++)
177+
close(sk[i]);
178+
179+
return err;
180+
}
181+
182+
static void run_test(struct test_case *test_case)
183+
{
184+
struct nstoken *nstoken;
185+
struct sk_memcg *skel;
186+
int cgroup, err;
187+
188+
skel = sk_memcg__open_and_load();
189+
if (!ASSERT_OK_PTR(skel, "open_and_load"))
190+
return;
191+
192+
skel->bss->nr_cpus = libbpf_num_possible_cpus();
193+
194+
err = sk_memcg__attach(skel);
195+
if (!ASSERT_OK(err, "attach"))
196+
goto destroy_skel;
197+
198+
cgroup = test__join_cgroup("/sk_memcg");
199+
if (!ASSERT_GE(cgroup, 0, "join_cgroup"))
200+
goto destroy_skel;
201+
202+
err = make_netns("sk_memcg");
203+
if (!ASSERT_EQ(err, 0, "make_netns"))
204+
goto close_cgroup;
205+
206+
nstoken = open_netns("sk_memcg");
207+
if (!ASSERT_OK_PTR(nstoken, "open_netns"))
208+
goto remove_netns;
209+
210+
err = check_exclusive(test_case, skel, false);
211+
if (!ASSERT_EQ(err, 0, "test_exclusive(false)"))
212+
goto close_netns;
213+
214+
err = write_sysctl("/proc/sys/net/core/memcg_exclusive", "1");
215+
if (!ASSERT_EQ(err, 0, "write_sysctl(1)"))
216+
goto close_netns;
217+
218+
err = check_exclusive(test_case, skel, true);
219+
if (!ASSERT_EQ(err, 0, "test_exclusive(true by sysctl)"))
220+
goto close_netns;
221+
222+
err = write_sysctl("/proc/sys/net/core/memcg_exclusive", "0");
223+
if (!ASSERT_EQ(err, 0, "write_sysctl(0)"))
224+
goto close_netns;
225+
226+
skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup);
227+
if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)"))
228+
goto close_netns;
229+
230+
err = check_exclusive(test_case, skel, true);
231+
ASSERT_EQ(err, 0, "test_exclusive(true by bpf)");
232+
233+
close_netns:
234+
close_netns(nstoken);
235+
remove_netns:
236+
remove_netns("sk_memcg");
237+
close_cgroup:
238+
close(cgroup);
239+
destroy_skel:
240+
sk_memcg__destroy(skel);
241+
}
242+
243+
struct test_case test_cases[] = {
244+
{
245+
.name = "TCP ",
246+
.family = AF_INET,
247+
.type = SOCK_STREAM,
248+
.create_sockets = tcp_create_sockets,
249+
.get_memory_allocated = tcp_get_memory_allocated,
250+
},
251+
{
252+
.name = "UDP ",
253+
.family = AF_INET,
254+
.type = SOCK_DGRAM,
255+
.create_sockets = udp_create_sockets,
256+
.get_memory_allocated = udp_get_memory_allocated,
257+
},
258+
{
259+
.name = "TCPv6",
260+
.family = AF_INET6,
261+
.type = SOCK_STREAM,
262+
.create_sockets = tcp_create_sockets,
263+
.get_memory_allocated = tcp_get_memory_allocated,
264+
},
265+
{
266+
.name = "UDPv6",
267+
.family = AF_INET6,
268+
.type = SOCK_DGRAM,
269+
.create_sockets = udp_create_sockets,
270+
.get_memory_allocated = udp_get_memory_allocated,
271+
},
272+
};
273+
274+
void serial_test_sk_memcg(void)
275+
{
276+
int i;
277+
278+
for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
279+
if (test__start_subtest(test_cases[i].name))
280+
run_test(&test_cases[i]);
281+
}
282+
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright 2025 Google LLC */
3+
4+
#include "bpf_tracing_net.h"
5+
#include <bpf/bpf_helpers.h>
6+
#include <bpf/bpf_tracing.h>
7+
#include <errno.h>
8+
9+
extern int tcp_memory_per_cpu_fw_alloc __ksym;
10+
extern int udp_memory_per_cpu_fw_alloc __ksym;
11+
12+
int nr_cpus;
13+
bool tcp_activated, udp_activated;
14+
long tcp_memory_allocated, udp_memory_allocated;
15+
16+
struct sk_prot {
17+
long *memory_allocated;
18+
int *memory_per_cpu_fw_alloc;
19+
};
20+
21+
static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx)
22+
{
23+
int *memory_per_cpu_fw_alloc;
24+
25+
memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i);
26+
if (memory_per_cpu_fw_alloc)
27+
*sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc;
28+
29+
return 0;
30+
}
31+
32+
static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc)
33+
{
34+
struct sock *sk = bpf_core_cast(_sk, struct sock);
35+
struct sk_prot sk_prot_ctx;
36+
long memory_allocated;
37+
38+
/* net_aligned_data.{tcp,udp}_memory_allocated was not available. */
39+
memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter;
40+
41+
sk_prot_ctx.memory_allocated = &memory_allocated;
42+
sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc;
43+
44+
bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0);
45+
46+
return memory_allocated;
47+
}
48+
49+
static void fentry_init_sock(struct sock *sk, bool *activated,
50+
long *memory_allocated, int *memory_per_cpu_fw_alloc)
51+
{
52+
if (!*activated)
53+
return;
54+
55+
*memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc);
56+
*activated = false;
57+
}
58+
59+
SEC("fentry/tcp_init_sock")
60+
int BPF_PROG(fentry_tcp_init_sock, struct sock *sk)
61+
{
62+
fentry_init_sock(sk, &tcp_activated,
63+
&tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc);
64+
return 0;
65+
}
66+
67+
SEC("fentry/udp_init_sock")
68+
int BPF_PROG(fentry_udp_init_sock, struct sock *sk)
69+
{
70+
fentry_init_sock(sk, &udp_activated,
71+
&udp_memory_allocated, &udp_memory_per_cpu_fw_alloc);
72+
return 0;
73+
}
74+
75+
SEC("cgroup/sock_create")
76+
int sock_create(struct bpf_sock *ctx)
77+
{
78+
u32 flags = SK_BPF_MEMCG_EXCLUSIVE;
79+
int err;
80+
81+
err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_MEMCG_FLAGS,
82+
&flags, sizeof(flags));
83+
if (err)
84+
goto err;
85+
86+
flags = 0;
87+
88+
err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_MEMCG_FLAGS,
89+
&flags, sizeof(flags));
90+
if (err)
91+
goto err;
92+
93+
if (flags != SK_BPF_MEMCG_EXCLUSIVE) {
94+
err = -EINVAL;
95+
goto err;
96+
}
97+
98+
return 1;
99+
100+
err:
101+
bpf_set_retval(err);
102+
return 0;
103+
}
104+
105+
char LICENSE[] SEC("license") = "GPL";

0 commit comments

Comments
 (0)