1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /*
4 * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5 * between src and dst. The netns fwd has veth links to each src and dst. The
6 * client is in src and server in dst. The test installs a TC BPF program to each
7 * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8 * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9 * switch from ingress side; it also installs a checker prog on the egress side
10 * to drop unexpected traffic.
11 */
12
13 #include <arpa/inet.h>
14 #include <linux/if.h>
15 #include <linux/if_tun.h>
16 #include <linux/limits.h>
17 #include <linux/sysctl.h>
18 #include <linux/time_types.h>
19 #include <linux/net_tstamp.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24
25 #include "test_progs.h"
26 #include "network_helpers.h"
27 #include "test_tc_neigh_fib.skel.h"
28 #include "test_tc_neigh.skel.h"
29 #include "test_tc_peer.skel.h"
30 #include "test_tc_dtime.skel.h"
31
32 #ifndef TCP_TX_DELAY
33 #define TCP_TX_DELAY 37
34 #endif
35
36 #define NS_SRC "ns_src"
37 #define NS_FWD "ns_fwd"
38 #define NS_DST "ns_dst"
39
40 #define IP4_SRC "172.16.1.100"
41 #define IP4_DST "172.16.2.100"
42 #define IP4_TUN_SRC "172.17.1.100"
43 #define IP4_TUN_FWD "172.17.1.200"
44 #define IP4_PORT 9004
45
46 #define IP6_SRC "0::1:dead:beef:cafe"
47 #define IP6_DST "0::2:dead:beef:cafe"
48 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
49 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
50 #define IP6_PORT 9006
51
52 #define IP4_SLL "169.254.0.1"
53 #define IP4_DLL "169.254.0.2"
54 #define IP4_NET "169.254.0.0"
55
56 #define MAC_DST_FWD "00:11:22:33:44:55"
57 #define MAC_DST "00:22:33:44:55:66"
58
59 #define IFADDR_STR_LEN 18
60 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
61
62 #define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
63 #define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
64 #define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
65
66 #define TIMEOUT_MILLIS 10000
67 #define NSEC_PER_SEC 1000000000ULL
68
69 #define log_err(MSG, ...) \
70 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
71 __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
72
73 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
74
write_file(const char * path,const char * newval)75 static int write_file(const char *path, const char *newval)
76 {
77 FILE *f;
78
79 f = fopen(path, "r+");
80 if (!f)
81 return -1;
82 if (fwrite(newval, strlen(newval), 1, f) != 1) {
83 log_err("writing to %s failed", path);
84 fclose(f);
85 return -1;
86 }
87 fclose(f);
88 return 0;
89 }
90
netns_setup_namespaces(const char * verb)91 static int netns_setup_namespaces(const char *verb)
92 {
93 const char * const *ns = namespaces;
94 char cmd[128];
95
96 while (*ns) {
97 snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
98 if (!ASSERT_OK(system(cmd), cmd))
99 return -1;
100 ns++;
101 }
102 return 0;
103 }
104
netns_setup_namespaces_nofail(const char * verb)105 static void netns_setup_namespaces_nofail(const char *verb)
106 {
107 const char * const *ns = namespaces;
108 char cmd[128];
109
110 while (*ns) {
111 snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
112 system(cmd);
113 ns++;
114 }
115 }
116
117 struct netns_setup_result {
118 int ifindex_veth_src_fwd;
119 int ifindex_veth_dst_fwd;
120 };
121
get_ifaddr(const char * name,char * ifaddr)122 static int get_ifaddr(const char *name, char *ifaddr)
123 {
124 char path[PATH_MAX];
125 FILE *f;
126 int ret;
127
128 snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
129 f = fopen(path, "r");
130 if (!ASSERT_OK_PTR(f, path))
131 return -1;
132
133 ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
134 if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
135 fclose(f);
136 return -1;
137 }
138 fclose(f);
139 return 0;
140 }
141
get_ifindex(const char * name)142 static int get_ifindex(const char *name)
143 {
144 char path[PATH_MAX];
145 char buf[32];
146 FILE *f;
147 int ret;
148
149 snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
150 f = fopen(path, "r");
151 if (!ASSERT_OK_PTR(f, path))
152 return -1;
153
154 ret = fread(buf, 1, sizeof(buf), f);
155 if (!ASSERT_GT(ret, 0, "fread ifindex")) {
156 fclose(f);
157 return -1;
158 }
159 fclose(f);
160 return atoi(buf);
161 }
162
163 #define SYS(fmt, ...) \
164 ({ \
165 char cmd[1024]; \
166 snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
167 if (!ASSERT_OK(system(cmd), cmd)) \
168 goto fail; \
169 })
170
netns_setup_links_and_routes(struct netns_setup_result * result)171 static int netns_setup_links_and_routes(struct netns_setup_result *result)
172 {
173 struct nstoken *nstoken = NULL;
174 char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
175
176 SYS("ip link add veth_src type veth peer name veth_src_fwd");
177 SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
178
179 SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
180 SYS("ip link set veth_dst address " MAC_DST);
181
182 if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
183 goto fail;
184
185 result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
186 if (result->ifindex_veth_src_fwd < 0)
187 goto fail;
188 result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
189 if (result->ifindex_veth_dst_fwd < 0)
190 goto fail;
191
192 SYS("ip link set veth_src netns " NS_SRC);
193 SYS("ip link set veth_src_fwd netns " NS_FWD);
194 SYS("ip link set veth_dst_fwd netns " NS_FWD);
195 SYS("ip link set veth_dst netns " NS_DST);
196
197 /** setup in 'src' namespace */
198 nstoken = open_netns(NS_SRC);
199 if (!ASSERT_OK_PTR(nstoken, "setns src"))
200 goto fail;
201
202 SYS("ip addr add " IP4_SRC "/32 dev veth_src");
203 SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
204 SYS("ip link set dev veth_src up");
205
206 SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
207 SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
208 SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
209
210 SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
211 veth_src_fwd_addr);
212 SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
213 veth_src_fwd_addr);
214
215 close_netns(nstoken);
216
217 /** setup in 'fwd' namespace */
218 nstoken = open_netns(NS_FWD);
219 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
220 goto fail;
221
222 /* The fwd netns automatically gets a v6 LL address / routes, but also
223 * needs v4 one in order to start ARP probing. IP4_NET route is added
224 * to the endpoints so that the ARP processing will reply.
225 */
226 SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
227 SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
228 SYS("ip link set dev veth_src_fwd up");
229 SYS("ip link set dev veth_dst_fwd up");
230
231 SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
232 SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
233 SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
234 SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
235
236 close_netns(nstoken);
237
238 /** setup in 'dst' namespace */
239 nstoken = open_netns(NS_DST);
240 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
241 goto fail;
242
243 SYS("ip addr add " IP4_DST "/32 dev veth_dst");
244 SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
245 SYS("ip link set dev veth_dst up");
246
247 SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
248 SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
249 SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
250
251 SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
252 SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
253
254 close_netns(nstoken);
255
256 return 0;
257 fail:
258 if (nstoken)
259 close_netns(nstoken);
260 return -1;
261 }
262
netns_load_bpf(void)263 static int netns_load_bpf(void)
264 {
265 SYS("tc qdisc add dev veth_src_fwd clsact");
266 SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
267 SRC_PROG_PIN_FILE);
268 SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
269 CHK_PROG_PIN_FILE);
270
271 SYS("tc qdisc add dev veth_dst_fwd clsact");
272 SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
273 DST_PROG_PIN_FILE);
274 SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
275 CHK_PROG_PIN_FILE);
276
277 return 0;
278 fail:
279 return -1;
280 }
281
test_tcp(int family,const char * addr,__u16 port)282 static void test_tcp(int family, const char *addr, __u16 port)
283 {
284 int listen_fd = -1, accept_fd = -1, client_fd = -1;
285 char buf[] = "testing testing";
286 int n;
287 struct nstoken *nstoken;
288
289 nstoken = open_netns(NS_DST);
290 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
291 return;
292
293 listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
294 if (!ASSERT_GE(listen_fd, 0, "listen"))
295 goto done;
296
297 close_netns(nstoken);
298 nstoken = open_netns(NS_SRC);
299 if (!ASSERT_OK_PTR(nstoken, "setns src"))
300 goto done;
301
302 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
303 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
304 goto done;
305
306 accept_fd = accept(listen_fd, NULL, NULL);
307 if (!ASSERT_GE(accept_fd, 0, "accept"))
308 goto done;
309
310 if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
311 goto done;
312
313 n = write(client_fd, buf, sizeof(buf));
314 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
315 goto done;
316
317 n = read(accept_fd, buf, sizeof(buf));
318 ASSERT_EQ(n, sizeof(buf), "recv from server");
319
320 done:
321 if (nstoken)
322 close_netns(nstoken);
323 if (listen_fd >= 0)
324 close(listen_fd);
325 if (accept_fd >= 0)
326 close(accept_fd);
327 if (client_fd >= 0)
328 close(client_fd);
329 }
330
test_ping(int family,const char * addr)331 static int test_ping(int family, const char *addr)
332 {
333 SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
334 return 0;
335 fail:
336 return -1;
337 }
338
test_connectivity(void)339 static void test_connectivity(void)
340 {
341 test_tcp(AF_INET, IP4_DST, IP4_PORT);
342 test_ping(AF_INET, IP4_DST);
343 test_tcp(AF_INET6, IP6_DST, IP6_PORT);
344 test_ping(AF_INET6, IP6_DST);
345 }
346
set_forwarding(bool enable)347 static int set_forwarding(bool enable)
348 {
349 int err;
350
351 err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
352 if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
353 return err;
354
355 err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
356 if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
357 return err;
358
359 return 0;
360 }
361
rcv_tstamp(int fd,const char * expected,size_t s)362 static void rcv_tstamp(int fd, const char *expected, size_t s)
363 {
364 struct __kernel_timespec pkt_ts = {};
365 char ctl[CMSG_SPACE(sizeof(pkt_ts))];
366 struct timespec now_ts;
367 struct msghdr msg = {};
368 __u64 now_ns, pkt_ns;
369 struct cmsghdr *cmsg;
370 struct iovec iov;
371 char data[32];
372 int ret;
373
374 iov.iov_base = data;
375 iov.iov_len = sizeof(data);
376 msg.msg_iov = &iov;
377 msg.msg_iovlen = 1;
378 msg.msg_control = &ctl;
379 msg.msg_controllen = sizeof(ctl);
380
381 ret = recvmsg(fd, &msg, 0);
382 if (!ASSERT_EQ(ret, s, "recvmsg"))
383 return;
384 ASSERT_STRNEQ(data, expected, s, "expected rcv data");
385
386 cmsg = CMSG_FIRSTHDR(&msg);
387 if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
388 cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
389 memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
390
391 pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
392 ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
393
394 ret = clock_gettime(CLOCK_REALTIME, &now_ts);
395 ASSERT_OK(ret, "clock_gettime");
396 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
397
398 if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
399 ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
400 "check rcv tstamp");
401 }
402
snd_tstamp(int fd,char * b,size_t s)403 static void snd_tstamp(int fd, char *b, size_t s)
404 {
405 struct sock_txtime opt = { .clockid = CLOCK_TAI };
406 char ctl[CMSG_SPACE(sizeof(__u64))];
407 struct timespec now_ts;
408 struct msghdr msg = {};
409 struct cmsghdr *cmsg;
410 struct iovec iov;
411 __u64 now_ns;
412 int ret;
413
414 ret = clock_gettime(CLOCK_TAI, &now_ts);
415 ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
416 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
417
418 iov.iov_base = b;
419 iov.iov_len = s;
420 msg.msg_iov = &iov;
421 msg.msg_iovlen = 1;
422 msg.msg_control = &ctl;
423 msg.msg_controllen = sizeof(ctl);
424
425 cmsg = CMSG_FIRSTHDR(&msg);
426 cmsg->cmsg_level = SOL_SOCKET;
427 cmsg->cmsg_type = SCM_TXTIME;
428 cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
429 *(__u64 *)CMSG_DATA(cmsg) = now_ns;
430
431 ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
432 ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
433
434 ret = sendmsg(fd, &msg, 0);
435 ASSERT_EQ(ret, s, "sendmsg");
436 }
437
test_inet_dtime(int family,int type,const char * addr,__u16 port)438 static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
439 {
440 int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
441 char buf[] = "testing testing";
442 struct nstoken *nstoken;
443
444 nstoken = open_netns(NS_DST);
445 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
446 return;
447 listen_fd = start_server(family, type, addr, port, 0);
448 close_netns(nstoken);
449
450 if (!ASSERT_GE(listen_fd, 0, "listen"))
451 return;
452
453 /* Ensure the kernel puts the (rcv) timestamp for all skb */
454 err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
455 &opt, sizeof(opt));
456 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
457 goto done;
458
459 if (type == SOCK_STREAM) {
460 /* Ensure the kernel set EDT when sending out rst/ack
461 * from the kernel's ctl_sk.
462 */
463 err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
464 sizeof(opt));
465 if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
466 goto done;
467 }
468
469 nstoken = open_netns(NS_SRC);
470 if (!ASSERT_OK_PTR(nstoken, "setns src"))
471 goto done;
472 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
473 close_netns(nstoken);
474
475 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
476 goto done;
477
478 if (type == SOCK_STREAM) {
479 int n;
480
481 accept_fd = accept(listen_fd, NULL, NULL);
482 if (!ASSERT_GE(accept_fd, 0, "accept"))
483 goto done;
484
485 n = write(client_fd, buf, sizeof(buf));
486 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
487 goto done;
488 rcv_tstamp(accept_fd, buf, sizeof(buf));
489 } else {
490 snd_tstamp(client_fd, buf, sizeof(buf));
491 rcv_tstamp(listen_fd, buf, sizeof(buf));
492 }
493
494 done:
495 close(listen_fd);
496 if (accept_fd != -1)
497 close(accept_fd);
498 if (client_fd != -1)
499 close(client_fd);
500 }
501
netns_load_dtime_bpf(struct test_tc_dtime * skel)502 static int netns_load_dtime_bpf(struct test_tc_dtime *skel)
503 {
504 struct nstoken *nstoken;
505
506 #define PIN_FNAME(__file) "/sys/fs/bpf/" #__file
507 #define PIN(__prog) ({ \
508 int err = bpf_program__pin(skel->progs.__prog, PIN_FNAME(__prog)); \
509 if (!ASSERT_OK(err, "pin " #__prog)) \
510 goto fail; \
511 })
512
513 /* setup ns_src tc progs */
514 nstoken = open_netns(NS_SRC);
515 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
516 return -1;
517 PIN(egress_host);
518 PIN(ingress_host);
519 SYS("tc qdisc add dev veth_src clsact");
520 SYS("tc filter add dev veth_src ingress bpf da object-pinned "
521 PIN_FNAME(ingress_host));
522 SYS("tc filter add dev veth_src egress bpf da object-pinned "
523 PIN_FNAME(egress_host));
524 close_netns(nstoken);
525
526 /* setup ns_dst tc progs */
527 nstoken = open_netns(NS_DST);
528 if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
529 return -1;
530 PIN(egress_host);
531 PIN(ingress_host);
532 SYS("tc qdisc add dev veth_dst clsact");
533 SYS("tc filter add dev veth_dst ingress bpf da object-pinned "
534 PIN_FNAME(ingress_host));
535 SYS("tc filter add dev veth_dst egress bpf da object-pinned "
536 PIN_FNAME(egress_host));
537 close_netns(nstoken);
538
539 /* setup ns_fwd tc progs */
540 nstoken = open_netns(NS_FWD);
541 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
542 return -1;
543 PIN(ingress_fwdns_prio100);
544 PIN(egress_fwdns_prio100);
545 PIN(ingress_fwdns_prio101);
546 PIN(egress_fwdns_prio101);
547 SYS("tc qdisc add dev veth_dst_fwd clsact");
548 SYS("tc filter add dev veth_dst_fwd ingress prio 100 bpf da object-pinned "
549 PIN_FNAME(ingress_fwdns_prio100));
550 SYS("tc filter add dev veth_dst_fwd ingress prio 101 bpf da object-pinned "
551 PIN_FNAME(ingress_fwdns_prio101));
552 SYS("tc filter add dev veth_dst_fwd egress prio 100 bpf da object-pinned "
553 PIN_FNAME(egress_fwdns_prio100));
554 SYS("tc filter add dev veth_dst_fwd egress prio 101 bpf da object-pinned "
555 PIN_FNAME(egress_fwdns_prio101));
556 SYS("tc qdisc add dev veth_src_fwd clsact");
557 SYS("tc filter add dev veth_src_fwd ingress prio 100 bpf da object-pinned "
558 PIN_FNAME(ingress_fwdns_prio100));
559 SYS("tc filter add dev veth_src_fwd ingress prio 101 bpf da object-pinned "
560 PIN_FNAME(ingress_fwdns_prio101));
561 SYS("tc filter add dev veth_src_fwd egress prio 100 bpf da object-pinned "
562 PIN_FNAME(egress_fwdns_prio100));
563 SYS("tc filter add dev veth_src_fwd egress prio 101 bpf da object-pinned "
564 PIN_FNAME(egress_fwdns_prio101));
565 close_netns(nstoken);
566
567 #undef PIN
568
569 return 0;
570
571 fail:
572 close_netns(nstoken);
573 return -1;
574 }
575
576 enum {
577 INGRESS_FWDNS_P100,
578 INGRESS_FWDNS_P101,
579 EGRESS_FWDNS_P100,
580 EGRESS_FWDNS_P101,
581 INGRESS_ENDHOST,
582 EGRESS_ENDHOST,
583 SET_DTIME,
584 __MAX_CNT,
585 };
586
587 const char *cnt_names[] = {
588 "ingress_fwdns_p100",
589 "ingress_fwdns_p101",
590 "egress_fwdns_p100",
591 "egress_fwdns_p101",
592 "ingress_endhost",
593 "egress_endhost",
594 "set_dtime",
595 };
596
597 enum {
598 TCP_IP6_CLEAR_DTIME,
599 TCP_IP4,
600 TCP_IP6,
601 UDP_IP4,
602 UDP_IP6,
603 TCP_IP4_RT_FWD,
604 TCP_IP6_RT_FWD,
605 UDP_IP4_RT_FWD,
606 UDP_IP6_RT_FWD,
607 UKN_TEST,
608 __NR_TESTS,
609 };
610
611 const char *test_names[] = {
612 "tcp ip6 clear dtime",
613 "tcp ip4",
614 "tcp ip6",
615 "udp ip4",
616 "udp ip6",
617 "tcp ip4 rt fwd",
618 "tcp ip6 rt fwd",
619 "udp ip4 rt fwd",
620 "udp ip6 rt fwd",
621 };
622
dtime_cnt_str(int test,int cnt)623 static const char *dtime_cnt_str(int test, int cnt)
624 {
625 static char name[64];
626
627 snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
628
629 return name;
630 }
631
dtime_err_str(int test,int cnt)632 static const char *dtime_err_str(int test, int cnt)
633 {
634 static char name[64];
635
636 snprintf(name, sizeof(name), "%s %s errs", test_names[test],
637 cnt_names[cnt]);
638
639 return name;
640 }
641
test_tcp_clear_dtime(struct test_tc_dtime * skel)642 static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
643 {
644 int i, t = TCP_IP6_CLEAR_DTIME;
645 __u32 *dtimes = skel->bss->dtimes[t];
646 __u32 *errs = skel->bss->errs[t];
647
648 skel->bss->test = t;
649 test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
650
651 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
652 dtime_cnt_str(t, INGRESS_FWDNS_P100));
653 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
654 dtime_cnt_str(t, INGRESS_FWDNS_P101));
655 ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
656 dtime_cnt_str(t, EGRESS_FWDNS_P100));
657 ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
658 dtime_cnt_str(t, EGRESS_FWDNS_P101));
659 ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
660 dtime_cnt_str(t, EGRESS_ENDHOST));
661 ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
662 dtime_cnt_str(t, INGRESS_ENDHOST));
663
664 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
665 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
666 }
667
test_tcp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)668 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
669 {
670 __u32 *dtimes, *errs;
671 const char *addr;
672 int i, t;
673
674 if (family == AF_INET) {
675 t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
676 addr = IP4_DST;
677 } else {
678 t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
679 addr = IP6_DST;
680 }
681
682 dtimes = skel->bss->dtimes[t];
683 errs = skel->bss->errs[t];
684
685 skel->bss->test = t;
686 test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
687
688 /* fwdns_prio100 prog does not read delivery_time_type, so
689 * kernel puts the (rcv) timetamp in __sk_buff->tstamp
690 */
691 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
692 dtime_cnt_str(t, INGRESS_FWDNS_P100));
693 for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
694 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
695
696 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
697 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
698 }
699
test_udp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)700 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
701 {
702 __u32 *dtimes, *errs;
703 const char *addr;
704 int i, t;
705
706 if (family == AF_INET) {
707 t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
708 addr = IP4_DST;
709 } else {
710 t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
711 addr = IP6_DST;
712 }
713
714 dtimes = skel->bss->dtimes[t];
715 errs = skel->bss->errs[t];
716
717 skel->bss->test = t;
718 test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
719
720 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
721 dtime_cnt_str(t, INGRESS_FWDNS_P100));
722 /* non mono delivery time is not forwarded */
723 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
724 dtime_cnt_str(t, INGRESS_FWDNS_P101));
725 for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
726 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
727
728 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
729 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
730 }
731
test_tc_redirect_dtime(struct netns_setup_result * setup_result)732 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
733 {
734 struct test_tc_dtime *skel;
735 struct nstoken *nstoken;
736 int err;
737
738 skel = test_tc_dtime__open();
739 if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
740 return;
741
742 skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
743 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
744
745 err = test_tc_dtime__load(skel);
746 if (!ASSERT_OK(err, "test_tc_dtime__load"))
747 goto done;
748
749 if (netns_load_dtime_bpf(skel))
750 goto done;
751
752 nstoken = open_netns(NS_FWD);
753 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
754 goto done;
755 err = set_forwarding(false);
756 close_netns(nstoken);
757 if (!ASSERT_OK(err, "disable forwarding"))
758 goto done;
759
760 test_tcp_clear_dtime(skel);
761
762 test_tcp_dtime(skel, AF_INET, true);
763 test_tcp_dtime(skel, AF_INET6, true);
764 test_udp_dtime(skel, AF_INET, true);
765 test_udp_dtime(skel, AF_INET6, true);
766
767 /* Test the kernel ip[6]_forward path instead
768 * of bpf_redirect_neigh().
769 */
770 nstoken = open_netns(NS_FWD);
771 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
772 goto done;
773 err = set_forwarding(true);
774 close_netns(nstoken);
775 if (!ASSERT_OK(err, "enable forwarding"))
776 goto done;
777
778 test_tcp_dtime(skel, AF_INET, false);
779 test_tcp_dtime(skel, AF_INET6, false);
780 test_udp_dtime(skel, AF_INET, false);
781 test_udp_dtime(skel, AF_INET6, false);
782
783 done:
784 test_tc_dtime__destroy(skel);
785 }
786
test_tc_redirect_neigh_fib(struct netns_setup_result * setup_result)787 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
788 {
789 struct nstoken *nstoken = NULL;
790 struct test_tc_neigh_fib *skel = NULL;
791 int err;
792
793 nstoken = open_netns(NS_FWD);
794 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
795 return;
796
797 skel = test_tc_neigh_fib__open();
798 if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
799 goto done;
800
801 if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
802 goto done;
803
804 err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
805 if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
806 goto done;
807
808 err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
809 if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
810 goto done;
811
812 err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
813 if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
814 goto done;
815
816 if (netns_load_bpf())
817 goto done;
818
819 /* bpf_fib_lookup() checks if forwarding is enabled */
820 if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
821 goto done;
822
823 test_connectivity();
824
825 done:
826 if (skel)
827 test_tc_neigh_fib__destroy(skel);
828 close_netns(nstoken);
829 }
830
test_tc_redirect_neigh(struct netns_setup_result * setup_result)831 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
832 {
833 struct nstoken *nstoken = NULL;
834 struct test_tc_neigh *skel = NULL;
835 int err;
836
837 nstoken = open_netns(NS_FWD);
838 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
839 return;
840
841 skel = test_tc_neigh__open();
842 if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
843 goto done;
844
845 skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
846 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
847
848 err = test_tc_neigh__load(skel);
849 if (!ASSERT_OK(err, "test_tc_neigh__load"))
850 goto done;
851
852 err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
853 if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
854 goto done;
855
856 err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
857 if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
858 goto done;
859
860 err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
861 if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
862 goto done;
863
864 if (netns_load_bpf())
865 goto done;
866
867 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
868 goto done;
869
870 test_connectivity();
871
872 done:
873 if (skel)
874 test_tc_neigh__destroy(skel);
875 close_netns(nstoken);
876 }
877
test_tc_redirect_peer(struct netns_setup_result * setup_result)878 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
879 {
880 struct nstoken *nstoken;
881 struct test_tc_peer *skel;
882 int err;
883
884 nstoken = open_netns(NS_FWD);
885 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
886 return;
887
888 skel = test_tc_peer__open();
889 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
890 goto done;
891
892 skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
893 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
894
895 err = test_tc_peer__load(skel);
896 if (!ASSERT_OK(err, "test_tc_peer__load"))
897 goto done;
898
899 err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
900 if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
901 goto done;
902
903 err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
904 if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
905 goto done;
906
907 err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
908 if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
909 goto done;
910
911 if (netns_load_bpf())
912 goto done;
913
914 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
915 goto done;
916
917 test_connectivity();
918
919 done:
920 if (skel)
921 test_tc_peer__destroy(skel);
922 close_netns(nstoken);
923 }
924
tun_open(char * name)925 static int tun_open(char *name)
926 {
927 struct ifreq ifr;
928 int fd, err;
929
930 fd = open("/dev/net/tun", O_RDWR);
931 if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
932 return -1;
933
934 memset(&ifr, 0, sizeof(ifr));
935
936 ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
937 if (*name)
938 strncpy(ifr.ifr_name, name, IFNAMSIZ);
939
940 err = ioctl(fd, TUNSETIFF, &ifr);
941 if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
942 goto fail;
943
944 SYS("ip link set dev %s up", name);
945
946 return fd;
947 fail:
948 close(fd);
949 return -1;
950 }
951
952 enum {
953 SRC_TO_TARGET = 0,
954 TARGET_TO_SRC = 1,
955 };
956
tun_relay_loop(int src_fd,int target_fd)957 static int tun_relay_loop(int src_fd, int target_fd)
958 {
959 fd_set rfds, wfds;
960
961 FD_ZERO(&rfds);
962 FD_ZERO(&wfds);
963
964 for (;;) {
965 char buf[1500];
966 int direction, nread, nwrite;
967
968 FD_SET(src_fd, &rfds);
969 FD_SET(target_fd, &rfds);
970
971 if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
972 log_err("select failed");
973 return 1;
974 }
975
976 direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
977
978 nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
979 if (nread < 0) {
980 log_err("read failed");
981 return 1;
982 }
983
984 nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
985 if (nwrite != nread) {
986 log_err("write failed");
987 return 1;
988 }
989 }
990 }
991
test_tc_redirect_peer_l3(struct netns_setup_result * setup_result)992 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
993 {
994 struct test_tc_peer *skel = NULL;
995 struct nstoken *nstoken = NULL;
996 int err;
997 int tunnel_pid = -1;
998 int src_fd, target_fd = -1;
999 int ifindex;
1000
1001 /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
1002 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
1003 * expose the L2 headers encapsulating the IP packet to BPF and hence
1004 * don't have skb in suitable state for this test. Alternative to TUN/TAP
1005 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
1006 * but that requires much more complicated setup.
1007 */
1008 nstoken = open_netns(NS_SRC);
1009 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
1010 return;
1011
1012 src_fd = tun_open("tun_src");
1013 if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
1014 goto fail;
1015
1016 close_netns(nstoken);
1017
1018 nstoken = open_netns(NS_FWD);
1019 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
1020 goto fail;
1021
1022 target_fd = tun_open("tun_fwd");
1023 if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
1024 goto fail;
1025
1026 tunnel_pid = fork();
1027 if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
1028 goto fail;
1029
1030 if (tunnel_pid == 0)
1031 exit(tun_relay_loop(src_fd, target_fd));
1032
1033 skel = test_tc_peer__open();
1034 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1035 goto fail;
1036
1037 ifindex = get_ifindex("tun_fwd");
1038 if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
1039 goto fail;
1040
1041 skel->rodata->IFINDEX_SRC = ifindex;
1042 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
1043
1044 err = test_tc_peer__load(skel);
1045 if (!ASSERT_OK(err, "test_tc_peer__load"))
1046 goto fail;
1047
1048 err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
1049 if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
1050 goto fail;
1051
1052 err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
1053 if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
1054 goto fail;
1055
1056 err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
1057 if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
1058 goto fail;
1059
1060 /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
1061 * towards dst, and "tc_dst" to redirect packets
1062 * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
1063 */
1064 SYS("tc qdisc add dev tun_fwd clsact");
1065 SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
1066 SRC_PROG_PIN_FILE);
1067
1068 SYS("tc qdisc add dev veth_dst_fwd clsact");
1069 SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
1070 DST_PROG_PIN_FILE);
1071 SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
1072 CHK_PROG_PIN_FILE);
1073
1074 /* Setup route and neigh tables */
1075 SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
1076 SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
1077
1078 SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
1079 SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
1080
1081 SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
1082 SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
1083 " dev tun_src scope global");
1084 SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
1085 SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
1086 SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
1087 " dev tun_src scope global");
1088 SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
1089
1090 SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
1091 SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
1092
1093 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1094 goto fail;
1095
1096 test_connectivity();
1097
1098 fail:
1099 if (tunnel_pid > 0) {
1100 kill(tunnel_pid, SIGTERM);
1101 waitpid(tunnel_pid, NULL, 0);
1102 }
1103 if (src_fd >= 0)
1104 close(src_fd);
1105 if (target_fd >= 0)
1106 close(target_fd);
1107 if (skel)
1108 test_tc_peer__destroy(skel);
1109 if (nstoken)
1110 close_netns(nstoken);
1111 }
1112
1113 #define RUN_TEST(name) \
1114 ({ \
1115 struct netns_setup_result setup_result; \
1116 if (test__start_subtest(#name)) \
1117 if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
1118 if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
1119 "setup links and routes")) \
1120 test_ ## name(&setup_result); \
1121 netns_setup_namespaces("delete"); \
1122 } \
1123 })
1124
test_tc_redirect_run_tests(void * arg)1125 static void *test_tc_redirect_run_tests(void *arg)
1126 {
1127 netns_setup_namespaces_nofail("delete");
1128
1129 RUN_TEST(tc_redirect_peer);
1130 RUN_TEST(tc_redirect_peer_l3);
1131 RUN_TEST(tc_redirect_neigh);
1132 RUN_TEST(tc_redirect_neigh_fib);
1133 RUN_TEST(tc_redirect_dtime);
1134 return NULL;
1135 }
1136
serial_test_tc_redirect(void)1137 void serial_test_tc_redirect(void)
1138 {
1139 pthread_t test_thread;
1140 int err;
1141
1142 /* Run the tests in their own thread to isolate the namespace changes
1143 * so they do not affect the environment of other tests.
1144 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
1145 */
1146 err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
1147 if (ASSERT_OK(err, "pthread_create"))
1148 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
1149 }
1150