1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2022 Meta
3
4 #include <stddef.h>
5 #include <stdint.h>
6 #include <stdbool.h>
7 #include <linux/bpf.h>
8 #include <linux/stddef.h>
9 #include <linux/pkt_cls.h>
10 #include <linux/if_ether.h>
11 #include <linux/in.h>
12 #include <linux/ip.h>
13 #include <linux/ipv6.h>
14 #include <linux/tcp.h>
15 #include <linux/udp.h>
16 #include <bpf/bpf_helpers.h>
17 #include <bpf/bpf_endian.h>
18 #include <sys/socket.h>
19
20 /* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst
21 * | |
22 * ns_src | ns_fwd | ns_dst
23 *
24 * ns_src and ns_dst: ENDHOST namespace
25 * ns_fwd: Fowarding namespace
26 */
27
28 #define ctx_ptr(field) (void *)(long)(field)
29
30 #define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */
31 #define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */
32
33 #define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
34 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
35 #define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
36 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
37
38 #define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
39 a.s6_addr32[1] == b.s6_addr32[1] && \
40 a.s6_addr32[2] == b.s6_addr32[2] && \
41 a.s6_addr32[3] == b.s6_addr32[3])
42
43 volatile const __u32 IFINDEX_SRC;
44 volatile const __u32 IFINDEX_DST;
45
46 #define EGRESS_ENDHOST_MAGIC 0x0b9fbeef
47 #define INGRESS_FWDNS_MAGIC 0x1b9fbeef
48 #define EGRESS_FWDNS_MAGIC 0x2b9fbeef
49
50 enum {
51 INGRESS_FWDNS_P100,
52 INGRESS_FWDNS_P101,
53 EGRESS_FWDNS_P100,
54 EGRESS_FWDNS_P101,
55 INGRESS_ENDHOST,
56 EGRESS_ENDHOST,
57 SET_DTIME,
58 __MAX_CNT,
59 };
60
61 enum {
62 TCP_IP6_CLEAR_DTIME,
63 TCP_IP4,
64 TCP_IP6,
65 UDP_IP4,
66 UDP_IP6,
67 TCP_IP4_RT_FWD,
68 TCP_IP6_RT_FWD,
69 UDP_IP4_RT_FWD,
70 UDP_IP6_RT_FWD,
71 UKN_TEST,
72 __NR_TESTS,
73 };
74
75 enum {
76 SRC_NS = 1,
77 DST_NS,
78 };
79
80 __u32 dtimes[__NR_TESTS][__MAX_CNT] = {};
81 __u32 errs[__NR_TESTS][__MAX_CNT] = {};
82 __u32 test = 0;
83
inc_dtimes(__u32 idx)84 static void inc_dtimes(__u32 idx)
85 {
86 if (test < __NR_TESTS)
87 dtimes[test][idx]++;
88 else
89 dtimes[UKN_TEST][idx]++;
90 }
91
inc_errs(__u32 idx)92 static void inc_errs(__u32 idx)
93 {
94 if (test < __NR_TESTS)
95 errs[test][idx]++;
96 else
97 errs[UKN_TEST][idx]++;
98 }
99
skb_proto(int type)100 static int skb_proto(int type)
101 {
102 return type & 0xff;
103 }
104
skb_ns(int type)105 static int skb_ns(int type)
106 {
107 return (type >> 8) & 0xff;
108 }
109
fwdns_clear_dtime(void)110 static bool fwdns_clear_dtime(void)
111 {
112 return test == TCP_IP6_CLEAR_DTIME;
113 }
114
bpf_fwd(void)115 static bool bpf_fwd(void)
116 {
117 return test < TCP_IP4_RT_FWD;
118 }
119
get_proto(void)120 static __u8 get_proto(void)
121 {
122 switch (test) {
123 case UDP_IP4:
124 case UDP_IP6:
125 case UDP_IP4_RT_FWD:
126 case UDP_IP6_RT_FWD:
127 return IPPROTO_UDP;
128 default:
129 return IPPROTO_TCP;
130 }
131 }
132
133 /* -1: parse error: TC_ACT_SHOT
134 * 0: not testing traffic: TC_ACT_OK
135 * >0: first byte is the inet_proto, second byte has the netns
136 * of the sender
137 */
skb_get_type(struct __sk_buff * skb)138 static int skb_get_type(struct __sk_buff *skb)
139 {
140 __u16 dst_ns_port = __bpf_htons(50000 + test);
141 void *data_end = ctx_ptr(skb->data_end);
142 void *data = ctx_ptr(skb->data);
143 __u8 inet_proto = 0, ns = 0;
144 struct ipv6hdr *ip6h;
145 __u16 sport, dport;
146 struct iphdr *iph;
147 struct tcphdr *th;
148 struct udphdr *uh;
149 void *trans;
150
151 switch (skb->protocol) {
152 case __bpf_htons(ETH_P_IP):
153 iph = data + sizeof(struct ethhdr);
154 if (iph + 1 > data_end)
155 return -1;
156 if (iph->saddr == ip4_src)
157 ns = SRC_NS;
158 else if (iph->saddr == ip4_dst)
159 ns = DST_NS;
160 inet_proto = iph->protocol;
161 trans = iph + 1;
162 break;
163 case __bpf_htons(ETH_P_IPV6):
164 ip6h = data + sizeof(struct ethhdr);
165 if (ip6h + 1 > data_end)
166 return -1;
167 if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src))
168 ns = SRC_NS;
169 else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst))
170 ns = DST_NS;
171 inet_proto = ip6h->nexthdr;
172 trans = ip6h + 1;
173 break;
174 default:
175 return 0;
176 }
177
178 /* skb is not from src_ns or dst_ns.
179 * skb is not the testing IPPROTO.
180 */
181 if (!ns || inet_proto != get_proto())
182 return 0;
183
184 switch (inet_proto) {
185 case IPPROTO_TCP:
186 th = trans;
187 if (th + 1 > data_end)
188 return -1;
189 sport = th->source;
190 dport = th->dest;
191 break;
192 case IPPROTO_UDP:
193 uh = trans;
194 if (uh + 1 > data_end)
195 return -1;
196 sport = uh->source;
197 dport = uh->dest;
198 break;
199 default:
200 return 0;
201 }
202
203 /* The skb is the testing traffic */
204 if ((ns == SRC_NS && dport == dst_ns_port) ||
205 (ns == DST_NS && sport == dst_ns_port))
206 return (ns << 8 | inet_proto);
207
208 return 0;
209 }
210
211 /* format: direction@iface@netns
212 * egress@veth_(src|dst)@ns_(src|dst)
213 */
214 SEC("tc")
egress_host(struct __sk_buff * skb)215 int egress_host(struct __sk_buff *skb)
216 {
217 int skb_type;
218
219 skb_type = skb_get_type(skb);
220 if (skb_type == -1)
221 return TC_ACT_SHOT;
222 if (!skb_type)
223 return TC_ACT_OK;
224
225 if (skb_proto(skb_type) == IPPROTO_TCP) {
226 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
227 skb->tstamp)
228 inc_dtimes(EGRESS_ENDHOST);
229 else
230 inc_errs(EGRESS_ENDHOST);
231 } else {
232 if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC &&
233 skb->tstamp)
234 inc_dtimes(EGRESS_ENDHOST);
235 else
236 inc_errs(EGRESS_ENDHOST);
237 }
238
239 skb->tstamp = EGRESS_ENDHOST_MAGIC;
240
241 return TC_ACT_OK;
242 }
243
244 /* ingress@veth_(src|dst)@ns_(src|dst) */
245 SEC("tc")
ingress_host(struct __sk_buff * skb)246 int ingress_host(struct __sk_buff *skb)
247 {
248 int skb_type;
249
250 skb_type = skb_get_type(skb);
251 if (skb_type == -1)
252 return TC_ACT_SHOT;
253 if (!skb_type)
254 return TC_ACT_OK;
255
256 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
257 skb->tstamp == EGRESS_FWDNS_MAGIC)
258 inc_dtimes(INGRESS_ENDHOST);
259 else
260 inc_errs(INGRESS_ENDHOST);
261
262 return TC_ACT_OK;
263 }
264
265 /* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */
266 SEC("tc")
ingress_fwdns_prio100(struct __sk_buff * skb)267 int ingress_fwdns_prio100(struct __sk_buff *skb)
268 {
269 int skb_type;
270
271 skb_type = skb_get_type(skb);
272 if (skb_type == -1)
273 return TC_ACT_SHOT;
274 if (!skb_type)
275 return TC_ACT_OK;
276
277 /* delivery_time is only available to the ingress
278 * if the tc-bpf checks the skb->tstamp_type.
279 */
280 if (skb->tstamp == EGRESS_ENDHOST_MAGIC)
281 inc_errs(INGRESS_FWDNS_P100);
282
283 if (fwdns_clear_dtime())
284 skb->tstamp = 0;
285
286 return TC_ACT_UNSPEC;
287 }
288
289 /* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */
290 SEC("tc")
egress_fwdns_prio100(struct __sk_buff * skb)291 int egress_fwdns_prio100(struct __sk_buff *skb)
292 {
293 int skb_type;
294
295 skb_type = skb_get_type(skb);
296 if (skb_type == -1)
297 return TC_ACT_SHOT;
298 if (!skb_type)
299 return TC_ACT_OK;
300
301 /* delivery_time is always available to egress even
302 * the tc-bpf did not use the tstamp_type.
303 */
304 if (skb->tstamp == INGRESS_FWDNS_MAGIC)
305 inc_dtimes(EGRESS_FWDNS_P100);
306 else
307 inc_errs(EGRESS_FWDNS_P100);
308
309 if (fwdns_clear_dtime())
310 skb->tstamp = 0;
311
312 return TC_ACT_UNSPEC;
313 }
314
315 /* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */
316 SEC("tc")
ingress_fwdns_prio101(struct __sk_buff * skb)317 int ingress_fwdns_prio101(struct __sk_buff *skb)
318 {
319 __u64 expected_dtime = EGRESS_ENDHOST_MAGIC;
320 int skb_type;
321
322 skb_type = skb_get_type(skb);
323 if (skb_type == -1 || !skb_type)
324 /* Should have handled in prio100 */
325 return TC_ACT_SHOT;
326
327 if (skb_proto(skb_type) == IPPROTO_UDP)
328 expected_dtime = 0;
329
330 if (skb->tstamp_type) {
331 if (fwdns_clear_dtime() ||
332 skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
333 skb->tstamp != expected_dtime)
334 inc_errs(INGRESS_FWDNS_P101);
335 else
336 inc_dtimes(INGRESS_FWDNS_P101);
337 } else {
338 if (!fwdns_clear_dtime() && expected_dtime)
339 inc_errs(INGRESS_FWDNS_P101);
340 }
341
342 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
343 skb->tstamp = INGRESS_FWDNS_MAGIC;
344 } else {
345 if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
346 BPF_SKB_TSTAMP_DELIVERY_MONO))
347 inc_errs(SET_DTIME);
348 if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
349 BPF_SKB_TSTAMP_UNSPEC))
350 inc_errs(SET_DTIME);
351 }
352
353 if (skb_ns(skb_type) == SRC_NS)
354 return bpf_fwd() ?
355 bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK;
356 else
357 return bpf_fwd() ?
358 bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK;
359 }
360
361 /* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */
362 SEC("tc")
egress_fwdns_prio101(struct __sk_buff * skb)363 int egress_fwdns_prio101(struct __sk_buff *skb)
364 {
365 int skb_type;
366
367 skb_type = skb_get_type(skb);
368 if (skb_type == -1 || !skb_type)
369 /* Should have handled in prio100 */
370 return TC_ACT_SHOT;
371
372 if (skb->tstamp_type) {
373 if (fwdns_clear_dtime() ||
374 skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
375 skb->tstamp != INGRESS_FWDNS_MAGIC)
376 inc_errs(EGRESS_FWDNS_P101);
377 else
378 inc_dtimes(EGRESS_FWDNS_P101);
379 } else {
380 if (!fwdns_clear_dtime())
381 inc_errs(EGRESS_FWDNS_P101);
382 }
383
384 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
385 skb->tstamp = EGRESS_FWDNS_MAGIC;
386 } else {
387 if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC,
388 BPF_SKB_TSTAMP_DELIVERY_MONO))
389 inc_errs(SET_DTIME);
390 if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
391 BPF_SKB_TSTAMP_UNSPEC))
392 inc_errs(SET_DTIME);
393 }
394
395 return TC_ACT_OK;
396 }
397
398 char __license[] SEC("license") = "GPL";
399