1 // SPDX-License-Identifier: GPL-2.0
2
3 #define _GNU_SOURCE
4
5 #include <stddef.h>
6 #include <arpa/inet.h>
7 #include <error.h>
8 #include <errno.h>
9 #include <net/if.h>
10 #include <linux/in.h>
11 #include <linux/netlink.h>
12 #include <linux/rtnetlink.h>
13 #include <netinet/if_ether.h>
14 #include <netinet/ip.h>
15 #include <netinet/ip6.h>
16 #include <netinet/udp.h>
17 #include <stdbool.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <sys/ioctl.h>
22 #include <sys/socket.h>
23 #include <sys/stat.h>
24 #include <sys/time.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27
28 #ifndef ETH_MAX_MTU
29 #define ETH_MAX_MTU 0xFFFFU
30 #endif
31
32 #ifndef UDP_SEGMENT
33 #define UDP_SEGMENT 103
34 #endif
35
36 #ifndef UDP_MAX_SEGMENTS
37 #define UDP_MAX_SEGMENTS (1 << 6UL)
38 #endif
39
40 #define CONST_MTU_TEST 1500
41
42 #define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr))
43 #define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr))
44
45 #define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4)
46 #define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6)
47
48 #define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4)
49 #define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6)
50
51 static bool cfg_do_ipv4;
52 static bool cfg_do_ipv6;
53 static bool cfg_do_connected;
54 static bool cfg_do_connectionless;
55 static bool cfg_do_msgmore;
56 static bool cfg_do_setsockopt;
57 static int cfg_specific_test_id = -1;
58
59 static const char cfg_ifname[] = "lo";
60 static unsigned short cfg_port = 9000;
61
62 static char buf[ETH_MAX_MTU];
63
64 struct testcase {
65 int tlen; /* send() buffer size, may exceed mss */
66 bool tfail; /* send() call is expected to fail */
67 int gso_len; /* mss after applying gso */
68 int r_num_mss; /* recv(): number of calls of full mss */
69 int r_len_last; /* recv(): size of last non-mss dgram, if any */
70 };
71
72 const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
73 const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
74
75 struct testcase testcases_v4[] = {
76 {
77 /* no GSO: send a single byte */
78 .tlen = 1,
79 .r_len_last = 1,
80 },
81 {
82 /* no GSO: send a single MSS */
83 .tlen = CONST_MSS_V4,
84 .r_num_mss = 1,
85 },
86 {
87 /* no GSO: send a single MSS + 1B: fail */
88 .tlen = CONST_MSS_V4 + 1,
89 .tfail = true,
90 },
91 {
92 /* send a single MSS: will fall back to no GSO */
93 .tlen = CONST_MSS_V4,
94 .gso_len = CONST_MSS_V4,
95 .r_num_mss = 1,
96 },
97 {
98 /* send a single MSS + 1B */
99 .tlen = CONST_MSS_V4 + 1,
100 .gso_len = CONST_MSS_V4,
101 .r_num_mss = 1,
102 .r_len_last = 1,
103 },
104 {
105 /* send exactly 2 MSS */
106 .tlen = CONST_MSS_V4 * 2,
107 .gso_len = CONST_MSS_V4,
108 .r_num_mss = 2,
109 },
110 {
111 /* send 2 MSS + 1B */
112 .tlen = (CONST_MSS_V4 * 2) + 1,
113 .gso_len = CONST_MSS_V4,
114 .r_num_mss = 2,
115 .r_len_last = 1,
116 },
117 {
118 /* send MAX segs */
119 .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4,
120 .gso_len = CONST_MSS_V4,
121 .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4),
122 },
123
124 {
125 /* send MAX bytes */
126 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4,
127 .gso_len = CONST_MSS_V4,
128 .r_num_mss = CONST_MAX_SEGS_V4,
129 .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 -
130 (CONST_MAX_SEGS_V4 * CONST_MSS_V4),
131 },
132 {
133 /* send MAX + 1: fail */
134 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1,
135 .gso_len = CONST_MSS_V4,
136 .tfail = true,
137 },
138 {
139 /* send a single 1B MSS: will fall back to no GSO */
140 .tlen = 1,
141 .gso_len = 1,
142 .r_num_mss = 1,
143 },
144 {
145 /* send 2 1B segments */
146 .tlen = 2,
147 .gso_len = 1,
148 .r_num_mss = 2,
149 },
150 {
151 /* send 2B + 2B + 1B segments */
152 .tlen = 5,
153 .gso_len = 2,
154 .r_num_mss = 2,
155 .r_len_last = 1,
156 },
157 {
158 /* send max number of min sized segments */
159 .tlen = UDP_MAX_SEGMENTS,
160 .gso_len = 1,
161 .r_num_mss = UDP_MAX_SEGMENTS,
162 },
163 {
164 /* send max number + 1 of min sized segments: fail */
165 .tlen = UDP_MAX_SEGMENTS + 1,
166 .gso_len = 1,
167 .tfail = true,
168 },
169 {
170 /* EOL */
171 }
172 };
173
174 #ifndef IP6_MAX_MTU
175 #define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr))
176 #endif
177
178 struct testcase testcases_v6[] = {
179 {
180 /* no GSO: send a single byte */
181 .tlen = 1,
182 .r_len_last = 1,
183 },
184 {
185 /* no GSO: send a single MSS */
186 .tlen = CONST_MSS_V6,
187 .r_num_mss = 1,
188 },
189 {
190 /* no GSO: send a single MSS + 1B: fail */
191 .tlen = CONST_MSS_V6 + 1,
192 .tfail = true,
193 },
194 {
195 /* send a single MSS: will fall back to no GSO */
196 .tlen = CONST_MSS_V6,
197 .gso_len = CONST_MSS_V6,
198 .r_num_mss = 1,
199 },
200 {
201 /* send a single MSS + 1B */
202 .tlen = CONST_MSS_V6 + 1,
203 .gso_len = CONST_MSS_V6,
204 .r_num_mss = 1,
205 .r_len_last = 1,
206 },
207 {
208 /* send exactly 2 MSS */
209 .tlen = CONST_MSS_V6 * 2,
210 .gso_len = CONST_MSS_V6,
211 .r_num_mss = 2,
212 },
213 {
214 /* send 2 MSS + 1B */
215 .tlen = (CONST_MSS_V6 * 2) + 1,
216 .gso_len = CONST_MSS_V6,
217 .r_num_mss = 2,
218 .r_len_last = 1,
219 },
220 {
221 /* send MAX segs */
222 .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6,
223 .gso_len = CONST_MSS_V6,
224 .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6),
225 },
226
227 {
228 /* send MAX bytes */
229 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6,
230 .gso_len = CONST_MSS_V6,
231 .r_num_mss = CONST_MAX_SEGS_V6,
232 .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 -
233 (CONST_MAX_SEGS_V6 * CONST_MSS_V6),
234 },
235 {
236 /* send MAX + 1: fail */
237 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1,
238 .gso_len = CONST_MSS_V6,
239 .tfail = true,
240 },
241 {
242 /* send a single 1B MSS: will fall back to no GSO */
243 .tlen = 1,
244 .gso_len = 1,
245 .r_num_mss = 1,
246 },
247 {
248 /* send 2 1B segments */
249 .tlen = 2,
250 .gso_len = 1,
251 .r_num_mss = 2,
252 },
253 {
254 /* send 2B + 2B + 1B segments */
255 .tlen = 5,
256 .gso_len = 2,
257 .r_num_mss = 2,
258 .r_len_last = 1,
259 },
260 {
261 /* send max number of min sized segments */
262 .tlen = UDP_MAX_SEGMENTS,
263 .gso_len = 1,
264 .r_num_mss = UDP_MAX_SEGMENTS,
265 },
266 {
267 /* send max number + 1 of min sized segments: fail */
268 .tlen = UDP_MAX_SEGMENTS + 1,
269 .gso_len = 1,
270 .tfail = true,
271 },
272 {
273 /* EOL */
274 }
275 };
276
get_device_mtu(int fd,const char * ifname)277 static unsigned int get_device_mtu(int fd, const char *ifname)
278 {
279 struct ifreq ifr;
280
281 memset(&ifr, 0, sizeof(ifr));
282
283 strcpy(ifr.ifr_name, ifname);
284
285 if (ioctl(fd, SIOCGIFMTU, &ifr))
286 error(1, errno, "ioctl get mtu");
287
288 return ifr.ifr_mtu;
289 }
290
__set_device_mtu(int fd,const char * ifname,unsigned int mtu)291 static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
292 {
293 struct ifreq ifr;
294
295 memset(&ifr, 0, sizeof(ifr));
296
297 ifr.ifr_mtu = mtu;
298 strcpy(ifr.ifr_name, ifname);
299
300 if (ioctl(fd, SIOCSIFMTU, &ifr))
301 error(1, errno, "ioctl set mtu");
302 }
303
set_device_mtu(int fd,int mtu)304 static void set_device_mtu(int fd, int mtu)
305 {
306 int val;
307
308 val = get_device_mtu(fd, cfg_ifname);
309 fprintf(stderr, "device mtu (orig): %u\n", val);
310
311 __set_device_mtu(fd, cfg_ifname, mtu);
312 val = get_device_mtu(fd, cfg_ifname);
313 if (val != mtu)
314 error(1, 0, "unable to set device mtu to %u\n", val);
315
316 fprintf(stderr, "device mtu (test): %u\n", val);
317 }
318
set_pmtu_discover(int fd,bool is_ipv4)319 static void set_pmtu_discover(int fd, bool is_ipv4)
320 {
321 int level, name, val;
322
323 if (is_ipv4) {
324 level = SOL_IP;
325 name = IP_MTU_DISCOVER;
326 val = IP_PMTUDISC_DO;
327 } else {
328 level = SOL_IPV6;
329 name = IPV6_MTU_DISCOVER;
330 val = IPV6_PMTUDISC_DO;
331 }
332
333 if (setsockopt(fd, level, name, &val, sizeof(val)))
334 error(1, errno, "setsockopt path mtu");
335 }
336
get_path_mtu(int fd,bool is_ipv4)337 static unsigned int get_path_mtu(int fd, bool is_ipv4)
338 {
339 socklen_t vallen;
340 unsigned int mtu;
341 int ret;
342
343 vallen = sizeof(mtu);
344 if (is_ipv4)
345 ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
346 else
347 ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
348
349 if (ret)
350 error(1, errno, "getsockopt mtu");
351
352
353 fprintf(stderr, "path mtu (read): %u\n", mtu);
354 return mtu;
355 }
356
357 /* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
set_route_mtu(int mtu,bool is_ipv4)358 static void set_route_mtu(int mtu, bool is_ipv4)
359 {
360 struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
361 struct nlmsghdr *nh;
362 struct rtattr *rta;
363 struct rtmsg *rt;
364 char data[NLMSG_ALIGN(sizeof(*nh)) +
365 NLMSG_ALIGN(sizeof(*rt)) +
366 NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
367 NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
368 NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
369 int fd, ret, alen, off = 0;
370
371 alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
372
373 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
374 if (fd == -1)
375 error(1, errno, "socket netlink");
376
377 memset(data, 0, sizeof(data));
378
379 nh = (void *)data;
380 nh->nlmsg_type = RTM_NEWROUTE;
381 nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
382 off += NLMSG_ALIGN(sizeof(*nh));
383
384 rt = (void *)(data + off);
385 rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
386 rt->rtm_table = RT_TABLE_MAIN;
387 rt->rtm_dst_len = alen << 3;
388 rt->rtm_protocol = RTPROT_BOOT;
389 rt->rtm_scope = RT_SCOPE_UNIVERSE;
390 rt->rtm_type = RTN_UNICAST;
391 off += NLMSG_ALIGN(sizeof(*rt));
392
393 rta = (void *)(data + off);
394 rta->rta_type = RTA_DST;
395 rta->rta_len = RTA_LENGTH(alen);
396 if (is_ipv4)
397 memcpy(RTA_DATA(rta), &addr4, alen);
398 else
399 memcpy(RTA_DATA(rta), &addr6, alen);
400 off += NLMSG_ALIGN(rta->rta_len);
401
402 rta = (void *)(data + off);
403 rta->rta_type = RTA_OIF;
404 rta->rta_len = RTA_LENGTH(sizeof(int));
405 *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
406 off += NLMSG_ALIGN(rta->rta_len);
407
408 /* MTU is a subtype in a metrics type */
409 rta = (void *)(data + off);
410 rta->rta_type = RTA_METRICS;
411 rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
412 off += NLMSG_ALIGN(rta->rta_len);
413
414 /* now fill MTU subtype. Note that it fits within above rta_len */
415 rta = (void *)(((char *) rta) + RTA_LENGTH(0));
416 rta->rta_type = RTAX_MTU;
417 rta->rta_len = RTA_LENGTH(sizeof(int));
418 *((int *)(RTA_DATA(rta))) = mtu;
419
420 nh->nlmsg_len = off;
421
422 ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
423 if (ret != off)
424 error(1, errno, "send netlink: %uB != %uB\n", ret, off);
425
426 if (close(fd))
427 error(1, errno, "close netlink");
428
429 fprintf(stderr, "route mtu (test): %u\n", mtu);
430 }
431
__send_one(int fd,struct msghdr * msg,int flags)432 static bool __send_one(int fd, struct msghdr *msg, int flags)
433 {
434 int ret;
435
436 ret = sendmsg(fd, msg, flags);
437 if (ret == -1 &&
438 (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL))
439 return false;
440 if (ret == -1)
441 error(1, errno, "sendmsg");
442 if (ret != msg->msg_iov->iov_len)
443 error(1, 0, "sendto: %d != %llu", ret,
444 (unsigned long long)msg->msg_iov->iov_len);
445 if (msg->msg_flags)
446 error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
447
448 return true;
449 }
450
send_one(int fd,int len,int gso_len,struct sockaddr * addr,socklen_t alen)451 static bool send_one(int fd, int len, int gso_len,
452 struct sockaddr *addr, socklen_t alen)
453 {
454 char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
455 struct msghdr msg = {0};
456 struct iovec iov = {0};
457 struct cmsghdr *cm;
458
459 iov.iov_base = buf;
460 iov.iov_len = len;
461
462 msg.msg_iov = &iov;
463 msg.msg_iovlen = 1;
464
465 msg.msg_name = addr;
466 msg.msg_namelen = alen;
467
468 if (gso_len && !cfg_do_setsockopt) {
469 msg.msg_control = control;
470 msg.msg_controllen = sizeof(control);
471
472 cm = CMSG_FIRSTHDR(&msg);
473 cm->cmsg_level = SOL_UDP;
474 cm->cmsg_type = UDP_SEGMENT;
475 cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
476 *((uint16_t *) CMSG_DATA(cm)) = gso_len;
477 }
478
479 /* If MSG_MORE, send 1 byte followed by remainder */
480 if (cfg_do_msgmore && len > 1) {
481 iov.iov_len = 1;
482 if (!__send_one(fd, &msg, MSG_MORE))
483 error(1, 0, "send 1B failed");
484
485 iov.iov_base++;
486 iov.iov_len = len - 1;
487 }
488
489 return __send_one(fd, &msg, 0);
490 }
491
recv_one(int fd,int flags)492 static int recv_one(int fd, int flags)
493 {
494 int ret;
495
496 ret = recv(fd, buf, sizeof(buf), flags);
497 if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT))
498 return 0;
499 if (ret == -1)
500 error(1, errno, "recv");
501
502 return ret;
503 }
504
run_one(struct testcase * test,int fdt,int fdr,struct sockaddr * addr,socklen_t alen)505 static void run_one(struct testcase *test, int fdt, int fdr,
506 struct sockaddr *addr, socklen_t alen)
507 {
508 int i, ret, val, mss;
509 bool sent;
510
511 fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
512 addr->sa_family == AF_INET ? 4 : 6,
513 test->tlen, test->gso_len,
514 test->tfail ? "(fail)" : "");
515
516 val = test->gso_len;
517 if (cfg_do_setsockopt) {
518 if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
519 error(1, errno, "setsockopt udp segment");
520 }
521
522 sent = send_one(fdt, test->tlen, test->gso_len, addr, alen);
523 if (sent && test->tfail)
524 error(1, 0, "send succeeded while expecting failure");
525 if (!sent && !test->tfail)
526 error(1, 0, "send failed while expecting success");
527 if (!sent)
528 return;
529
530 if (test->gso_len)
531 mss = test->gso_len;
532 else
533 mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
534
535
536 /* Recv all full MSS datagrams */
537 for (i = 0; i < test->r_num_mss; i++) {
538 ret = recv_one(fdr, 0);
539 if (ret != mss)
540 error(1, 0, "recv.%d: %d != %d", i, ret, mss);
541 }
542
543 /* Recv the non-full last datagram, if tlen was not a multiple of mss */
544 if (test->r_len_last) {
545 ret = recv_one(fdr, 0);
546 if (ret != test->r_len_last)
547 error(1, 0, "recv.%d: %d != %d (last)",
548 i, ret, test->r_len_last);
549 }
550
551 /* Verify received all data */
552 ret = recv_one(fdr, MSG_DONTWAIT);
553 if (ret)
554 error(1, 0, "recv: unexpected datagram");
555 }
556
run_all(int fdt,int fdr,struct sockaddr * addr,socklen_t alen)557 static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
558 {
559 struct testcase *tests, *test;
560
561 tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6;
562
563 for (test = tests; test->tlen; test++) {
564 /* if a specific test is given, then skip all others */
565 if (cfg_specific_test_id == -1 ||
566 cfg_specific_test_id == test - tests)
567 run_one(test, fdt, fdr, addr, alen);
568 }
569 }
570
run_test(struct sockaddr * addr,socklen_t alen)571 static void run_test(struct sockaddr *addr, socklen_t alen)
572 {
573 struct timeval tv = { .tv_usec = 100 * 1000 };
574 int fdr, fdt, val;
575
576 fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
577 if (fdr == -1)
578 error(1, errno, "socket r");
579
580 if (bind(fdr, addr, alen))
581 error(1, errno, "bind");
582
583 /* Have tests fail quickly instead of hang */
584 if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
585 error(1, errno, "setsockopt rcv timeout");
586
587 fdt = socket(addr->sa_family, SOCK_DGRAM, 0);
588 if (fdt == -1)
589 error(1, errno, "socket t");
590
591 /* Do not fragment these datagrams: only succeed if GSO works */
592 set_pmtu_discover(fdt, addr->sa_family == AF_INET);
593
594 if (cfg_do_connectionless) {
595 set_device_mtu(fdt, CONST_MTU_TEST);
596 run_all(fdt, fdr, addr, alen);
597 }
598
599 if (cfg_do_connected) {
600 set_device_mtu(fdt, CONST_MTU_TEST + 100);
601 set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
602
603 if (connect(fdt, addr, alen))
604 error(1, errno, "connect");
605
606 val = get_path_mtu(fdt, addr->sa_family == AF_INET);
607 if (val != CONST_MTU_TEST)
608 error(1, 0, "bad path mtu %u\n", val);
609
610 run_all(fdt, fdr, addr, 0 /* use connected addr */);
611 }
612
613 if (close(fdt))
614 error(1, errno, "close t");
615 if (close(fdr))
616 error(1, errno, "close r");
617 }
618
run_test_v4(void)619 static void run_test_v4(void)
620 {
621 struct sockaddr_in addr = {0};
622
623 addr.sin_family = AF_INET;
624 addr.sin_port = htons(cfg_port);
625 addr.sin_addr = addr4;
626
627 run_test((void *)&addr, sizeof(addr));
628 }
629
run_test_v6(void)630 static void run_test_v6(void)
631 {
632 struct sockaddr_in6 addr = {0};
633
634 addr.sin6_family = AF_INET6;
635 addr.sin6_port = htons(cfg_port);
636 addr.sin6_addr = addr6;
637
638 run_test((void *)&addr, sizeof(addr));
639 }
640
parse_opts(int argc,char ** argv)641 static void parse_opts(int argc, char **argv)
642 {
643 int c;
644
645 while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
646 switch (c) {
647 case '4':
648 cfg_do_ipv4 = true;
649 break;
650 case '6':
651 cfg_do_ipv6 = true;
652 break;
653 case 'c':
654 cfg_do_connected = true;
655 break;
656 case 'C':
657 cfg_do_connectionless = true;
658 break;
659 case 'm':
660 cfg_do_msgmore = true;
661 break;
662 case 's':
663 cfg_do_setsockopt = true;
664 break;
665 case 't':
666 cfg_specific_test_id = strtoul(optarg, NULL, 0);
667 break;
668 default:
669 error(1, 0, "%s: parse error", argv[0]);
670 }
671 }
672 }
673
main(int argc,char ** argv)674 int main(int argc, char **argv)
675 {
676 parse_opts(argc, argv);
677
678 if (cfg_do_ipv4)
679 run_test_v4();
680 if (cfg_do_ipv6)
681 run_test_v6();
682
683 fprintf(stderr, "OK\n");
684 return 0;
685 }
686