1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <netinet/tcp.h>
4 #include <unistd.h>
5
6 #include "alloc-util.h"
7 #include "fd-util.h"
8 #include "io-util.h"
9 #include "macro.h"
10 #include "missing_network.h"
11 #include "resolved-dns-stream.h"
12 #include "resolved-manager.h"
13
14 #define DNS_STREAMS_MAX 128
15
16 #define DNS_QUERIES_PER_STREAM 32
17
dns_stream_stop(DnsStream * s)18 static void dns_stream_stop(DnsStream *s) {
19 assert(s);
20
21 s->io_event_source = sd_event_source_disable_unref(s->io_event_source);
22 s->timeout_event_source = sd_event_source_disable_unref(s->timeout_event_source);
23 s->fd = safe_close(s->fd);
24
25 /* Disconnect us from the server object if we are now not usable anymore */
26 dns_stream_detach(s);
27 }
28
dns_stream_update_io(DnsStream * s)29 static int dns_stream_update_io(DnsStream *s) {
30 uint32_t f = 0;
31
32 assert(s);
33
34 if (s->write_packet && s->n_written < sizeof(s->write_size) + s->write_packet->size)
35 f |= EPOLLOUT;
36 else if (!ordered_set_isempty(s->write_queue)) {
37 dns_packet_unref(s->write_packet);
38 s->write_packet = ordered_set_steal_first(s->write_queue);
39 s->write_size = htobe16(s->write_packet->size);
40 s->n_written = 0;
41 f |= EPOLLOUT;
42 }
43
44 /* Let's read a packet if we haven't queued any yet. Except if we already hit a limit of parallel
45 * queries for this connection. */
46 if ((!s->read_packet || s->n_read < sizeof(s->read_size) + s->read_packet->size) &&
47 set_size(s->queries) < DNS_QUERIES_PER_STREAM)
48 f |= EPOLLIN;
49
50 s->requested_events = f;
51
52 #if ENABLE_DNS_OVER_TLS
53 /* For handshake and clean closing purposes, TLS can override requested events */
54 if (s->dnstls_events != 0)
55 f = s->dnstls_events;
56 #endif
57
58 return sd_event_source_set_io_events(s->io_event_source, f);
59 }
60
dns_stream_complete(DnsStream * s,int error)61 static int dns_stream_complete(DnsStream *s, int error) {
62 _cleanup_(dns_stream_unrefp) _unused_ DnsStream *ref = dns_stream_ref(s); /* Protect stream while we process it */
63
64 assert(s);
65 assert(error >= 0);
66
67 /* Error is > 0 when the connection failed for some reason in the network stack. It's == 0 if we sent
68 * and received exactly one packet each (in the LLMNR client case). */
69
70 #if ENABLE_DNS_OVER_TLS
71 if (s->encrypted) {
72 int r;
73
74 r = dnstls_stream_shutdown(s, error);
75 if (r != -EAGAIN)
76 dns_stream_stop(s);
77 } else
78 #endif
79 dns_stream_stop(s);
80
81 dns_stream_detach(s);
82
83 if (s->complete)
84 s->complete(s, error);
85 else /* the default action if no completion function is set is to close the stream */
86 dns_stream_unref(s);
87
88 return 0;
89 }
90
dns_stream_identify(DnsStream * s)91 static int dns_stream_identify(DnsStream *s) {
92 CMSG_BUFFER_TYPE(CMSG_SPACE(MAXSIZE(struct in_pktinfo, struct in6_pktinfo))
93 + CMSG_SPACE(int) + /* for the TTL */
94 + EXTRA_CMSG_SPACE /* kernel appears to require extra space */) control;
95 struct msghdr mh = {};
96 struct cmsghdr *cmsg;
97 socklen_t sl;
98 int r;
99
100 assert(s);
101
102 if (s->identified)
103 return 0;
104
105 /* Query the local side */
106 s->local_salen = sizeof(s->local);
107 r = getsockname(s->fd, &s->local.sa, &s->local_salen);
108 if (r < 0)
109 return -errno;
110 if (s->local.sa.sa_family == AF_INET6 && s->ifindex <= 0)
111 s->ifindex = s->local.in6.sin6_scope_id;
112
113 /* Query the remote side */
114 s->peer_salen = sizeof(s->peer);
115 r = getpeername(s->fd, &s->peer.sa, &s->peer_salen);
116 if (r < 0)
117 return -errno;
118 if (s->peer.sa.sa_family == AF_INET6 && s->ifindex <= 0)
119 s->ifindex = s->peer.in6.sin6_scope_id;
120
121 /* Check consistency */
122 assert(s->peer.sa.sa_family == s->local.sa.sa_family);
123 assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6));
124
125 /* Query connection meta information */
126 sl = sizeof(control);
127 if (s->peer.sa.sa_family == AF_INET) {
128 r = getsockopt(s->fd, IPPROTO_IP, IP_PKTOPTIONS, &control, &sl);
129 if (r < 0)
130 return -errno;
131 } else if (s->peer.sa.sa_family == AF_INET6) {
132
133 r = getsockopt(s->fd, IPPROTO_IPV6, IPV6_2292PKTOPTIONS, &control, &sl);
134 if (r < 0)
135 return -errno;
136 } else
137 return -EAFNOSUPPORT;
138
139 mh.msg_control = &control;
140 mh.msg_controllen = sl;
141
142 CMSG_FOREACH(cmsg, &mh) {
143
144 if (cmsg->cmsg_level == IPPROTO_IPV6) {
145 assert(s->peer.sa.sa_family == AF_INET6);
146
147 switch (cmsg->cmsg_type) {
148
149 case IPV6_PKTINFO: {
150 struct in6_pktinfo *i = (struct in6_pktinfo*) CMSG_DATA(cmsg);
151
152 if (s->ifindex <= 0)
153 s->ifindex = i->ipi6_ifindex;
154 break;
155 }
156
157 case IPV6_HOPLIMIT:
158 s->ttl = *(int *) CMSG_DATA(cmsg);
159 break;
160 }
161
162 } else if (cmsg->cmsg_level == IPPROTO_IP) {
163 assert(s->peer.sa.sa_family == AF_INET);
164
165 switch (cmsg->cmsg_type) {
166
167 case IP_PKTINFO: {
168 struct in_pktinfo *i = (struct in_pktinfo*) CMSG_DATA(cmsg);
169
170 if (s->ifindex <= 0)
171 s->ifindex = i->ipi_ifindex;
172 break;
173 }
174
175 case IP_TTL:
176 s->ttl = *(int *) CMSG_DATA(cmsg);
177 break;
178 }
179 }
180 }
181
182 /* The Linux kernel sets the interface index to the loopback
183 * device if the connection came from the local host since it
184 * avoids the routing table in such a case. Let's unset the
185 * interface index in such a case. */
186 if (s->ifindex == LOOPBACK_IFINDEX)
187 s->ifindex = 0;
188
189 /* If we don't know the interface index still, we look for the
190 * first local interface with a matching address. Yuck! */
191 if (s->ifindex <= 0)
192 s->ifindex = manager_find_ifindex(s->manager, s->local.sa.sa_family, sockaddr_in_addr(&s->local.sa));
193
194 if (s->protocol == DNS_PROTOCOL_LLMNR && s->ifindex > 0) {
195 /* Make sure all packets for this connection are sent on the same interface */
196 r = socket_set_unicast_if(s->fd, s->local.sa.sa_family, s->ifindex);
197 if (r < 0)
198 log_debug_errno(errno, "Failed to invoke IP_UNICAST_IF/IPV6_UNICAST_IF: %m");
199 }
200
201 s->identified = true;
202
203 return 0;
204 }
205
dns_stream_writev(DnsStream * s,const struct iovec * iov,size_t iovcnt,int flags)206 ssize_t dns_stream_writev(DnsStream *s, const struct iovec *iov, size_t iovcnt, int flags) {
207 ssize_t m;
208
209 assert(s);
210 assert(iov);
211
212 #if ENABLE_DNS_OVER_TLS
213 if (s->encrypted && !(flags & DNS_STREAM_WRITE_TLS_DATA))
214 return dnstls_stream_writev(s, iov, iovcnt);
215 #endif
216
217 if (s->tfo_salen > 0) {
218 struct msghdr hdr = {
219 .msg_iov = (struct iovec*) iov,
220 .msg_iovlen = iovcnt,
221 .msg_name = &s->tfo_address.sa,
222 .msg_namelen = s->tfo_salen
223 };
224
225 m = sendmsg(s->fd, &hdr, MSG_FASTOPEN);
226 if (m < 0) {
227 if (errno == EOPNOTSUPP) {
228 s->tfo_salen = 0;
229 if (connect(s->fd, &s->tfo_address.sa, s->tfo_salen) < 0)
230 return -errno;
231
232 return -EAGAIN;
233 }
234 if (errno == EINPROGRESS)
235 return -EAGAIN;
236
237 return -errno;
238 } else
239 s->tfo_salen = 0; /* connection is made */
240 } else {
241 m = writev(s->fd, iov, iovcnt);
242 if (m < 0)
243 return -errno;
244 }
245
246 return m;
247 }
248
dns_stream_read(DnsStream * s,void * buf,size_t count)249 static ssize_t dns_stream_read(DnsStream *s, void *buf, size_t count) {
250 ssize_t ss;
251
252 #if ENABLE_DNS_OVER_TLS
253 if (s->encrypted)
254 ss = dnstls_stream_read(s, buf, count);
255 else
256 #endif
257 {
258 ss = read(s->fd, buf, count);
259 if (ss < 0)
260 return -errno;
261 }
262
263 return ss;
264 }
265
on_stream_timeout(sd_event_source * es,usec_t usec,void * userdata)266 static int on_stream_timeout(sd_event_source *es, usec_t usec, void *userdata) {
267 DnsStream *s = userdata;
268
269 assert(s);
270
271 return dns_stream_complete(s, ETIMEDOUT);
272 }
273
dns_stream_take_read_packet(DnsStream * s)274 static DnsPacket *dns_stream_take_read_packet(DnsStream *s) {
275 assert(s);
276
277 /* Note, dns_stream_update() should be called after this is called. When this is called, the
278 * stream may be already full and the EPOLLIN flag is dropped from the stream IO event source.
279 * Even this makes a room to read in the stream, this does not call dns_stream_update(), hence
280 * EPOLLIN flag is not set automatically. So, to read further packets from the stream,
281 * dns_stream_update() must be called explicitly. Currently, this is only called from
282 * on_stream_io(), and there dns_stream_update() is called. */
283
284 if (!s->read_packet)
285 return NULL;
286
287 if (s->n_read < sizeof(s->read_size))
288 return NULL;
289
290 if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size))
291 return NULL;
292
293 s->n_read = 0;
294 return TAKE_PTR(s->read_packet);
295 }
296
on_stream_io(sd_event_source * es,int fd,uint32_t revents,void * userdata)297 static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
298 _cleanup_(dns_stream_unrefp) DnsStream *s = dns_stream_ref(userdata); /* Protect stream while we process it */
299 bool progressed = false;
300 int r;
301
302 assert(s);
303
304 #if ENABLE_DNS_OVER_TLS
305 if (s->encrypted) {
306 r = dnstls_stream_on_io(s, revents);
307 if (r == DNSTLS_STREAM_CLOSED)
308 return 0;
309 if (r == -EAGAIN)
310 return dns_stream_update_io(s);
311 if (r < 0)
312 return dns_stream_complete(s, -r);
313
314 r = dns_stream_update_io(s);
315 if (r < 0)
316 return r;
317 }
318 #endif
319
320 /* only identify after connecting */
321 if (s->tfo_salen == 0) {
322 r = dns_stream_identify(s);
323 if (r < 0)
324 return dns_stream_complete(s, -r);
325 }
326
327 if ((revents & EPOLLOUT) &&
328 s->write_packet &&
329 s->n_written < sizeof(s->write_size) + s->write_packet->size) {
330
331 struct iovec iov[] = {
332 IOVEC_MAKE(&s->write_size, sizeof(s->write_size)),
333 IOVEC_MAKE(DNS_PACKET_DATA(s->write_packet), s->write_packet->size),
334 };
335
336 IOVEC_INCREMENT(iov, ELEMENTSOF(iov), s->n_written);
337
338 ssize_t ss = dns_stream_writev(s, iov, ELEMENTSOF(iov), 0);
339 if (ss < 0) {
340 if (!ERRNO_IS_TRANSIENT(ss))
341 return dns_stream_complete(s, -ss);
342 } else {
343 progressed = true;
344 s->n_written += ss;
345 }
346
347 /* Are we done? If so, disable the event source for EPOLLOUT */
348 if (s->n_written >= sizeof(s->write_size) + s->write_packet->size) {
349 r = dns_stream_update_io(s);
350 if (r < 0)
351 return dns_stream_complete(s, -r);
352 }
353 }
354
355 while ((revents & (EPOLLIN|EPOLLHUP|EPOLLRDHUP)) &&
356 (!s->read_packet ||
357 s->n_read < sizeof(s->read_size) + s->read_packet->size)) {
358
359 if (s->n_read < sizeof(s->read_size)) {
360 ssize_t ss;
361
362 ss = dns_stream_read(s, (uint8_t*) &s->read_size + s->n_read, sizeof(s->read_size) - s->n_read);
363 if (ss < 0) {
364 if (!ERRNO_IS_TRANSIENT(ss))
365 return dns_stream_complete(s, -ss);
366 break;
367 } else if (ss == 0)
368 return dns_stream_complete(s, ECONNRESET);
369 else {
370 progressed = true;
371 s->n_read += ss;
372 }
373 }
374
375 if (s->n_read >= sizeof(s->read_size)) {
376
377 if (be16toh(s->read_size) < DNS_PACKET_HEADER_SIZE)
378 return dns_stream_complete(s, EBADMSG);
379
380 if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size)) {
381 ssize_t ss;
382
383 if (!s->read_packet) {
384 r = dns_packet_new(&s->read_packet, s->protocol, be16toh(s->read_size), DNS_PACKET_SIZE_MAX);
385 if (r < 0)
386 return dns_stream_complete(s, -r);
387
388 s->read_packet->size = be16toh(s->read_size);
389 s->read_packet->ipproto = IPPROTO_TCP;
390 s->read_packet->family = s->peer.sa.sa_family;
391 s->read_packet->ttl = s->ttl;
392 s->read_packet->ifindex = s->ifindex;
393 s->read_packet->timestamp = now(CLOCK_BOOTTIME);
394
395 if (s->read_packet->family == AF_INET) {
396 s->read_packet->sender.in = s->peer.in.sin_addr;
397 s->read_packet->sender_port = be16toh(s->peer.in.sin_port);
398 s->read_packet->destination.in = s->local.in.sin_addr;
399 s->read_packet->destination_port = be16toh(s->local.in.sin_port);
400 } else {
401 assert(s->read_packet->family == AF_INET6);
402 s->read_packet->sender.in6 = s->peer.in6.sin6_addr;
403 s->read_packet->sender_port = be16toh(s->peer.in6.sin6_port);
404 s->read_packet->destination.in6 = s->local.in6.sin6_addr;
405 s->read_packet->destination_port = be16toh(s->local.in6.sin6_port);
406
407 if (s->read_packet->ifindex == 0)
408 s->read_packet->ifindex = s->peer.in6.sin6_scope_id;
409 if (s->read_packet->ifindex == 0)
410 s->read_packet->ifindex = s->local.in6.sin6_scope_id;
411 }
412 }
413
414 ss = dns_stream_read(s,
415 (uint8_t*) DNS_PACKET_DATA(s->read_packet) + s->n_read - sizeof(s->read_size),
416 sizeof(s->read_size) + be16toh(s->read_size) - s->n_read);
417 if (ss < 0) {
418 if (!ERRNO_IS_TRANSIENT(ss))
419 return dns_stream_complete(s, -ss);
420 break;
421 } else if (ss == 0)
422 return dns_stream_complete(s, ECONNRESET);
423 else
424 s->n_read += ss;
425 }
426
427 /* Are we done? If so, call the packet handler and re-enable EPOLLIN for the
428 * event source if necessary. */
429 _cleanup_(dns_packet_unrefp) DnsPacket *p = dns_stream_take_read_packet(s);
430 if (p) {
431 assert(s->on_packet);
432 r = s->on_packet(s, p);
433 if (r < 0)
434 return r;
435
436 r = dns_stream_update_io(s);
437 if (r < 0)
438 return dns_stream_complete(s, -r);
439
440 s->packet_received = true;
441
442 /* If we just disabled the read event, stop reading */
443 if (!FLAGS_SET(s->requested_events, EPOLLIN))
444 break;
445 }
446 }
447 }
448
449 /* Complete the stream if finished reading and writing one packet, and there's nothing
450 * else left to write. */
451 if (s->type == DNS_STREAM_LLMNR_SEND && s->packet_received &&
452 !FLAGS_SET(s->requested_events, EPOLLOUT))
453 return dns_stream_complete(s, 0);
454
455 /* If we did something, let's restart the timeout event source */
456 if (progressed && s->timeout_event_source) {
457 r = sd_event_source_set_time_relative(s->timeout_event_source, DNS_STREAM_ESTABLISHED_TIMEOUT_USEC);
458 if (r < 0)
459 log_warning_errno(errno, "Couldn't restart TCP connection timeout, ignoring: %m");
460 }
461
462 return 0;
463 }
464
dns_stream_free(DnsStream * s)465 static DnsStream *dns_stream_free(DnsStream *s) {
466 DnsPacket *p;
467
468 assert(s);
469
470 dns_stream_stop(s);
471
472 if (s->manager) {
473 LIST_REMOVE(streams, s->manager->dns_streams, s);
474 s->manager->n_dns_streams[s->type]--;
475 }
476
477 #if ENABLE_DNS_OVER_TLS
478 if (s->encrypted)
479 dnstls_stream_free(s);
480 #endif
481
482 ORDERED_SET_FOREACH(p, s->write_queue)
483 dns_packet_unref(ordered_set_remove(s->write_queue, p));
484
485 dns_packet_unref(s->write_packet);
486 dns_packet_unref(s->read_packet);
487 dns_server_unref(s->server);
488
489 ordered_set_free(s->write_queue);
490
491 return mfree(s);
492 }
493
494 DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsStream, dns_stream, dns_stream_free);
495
dns_stream_new(Manager * m,DnsStream ** ret,DnsStreamType type,DnsProtocol protocol,int fd,const union sockaddr_union * tfo_address,int (on_packet)(DnsStream *,DnsPacket *),int (complete)(DnsStream *,int),usec_t connect_timeout_usec)496 int dns_stream_new(
497 Manager *m,
498 DnsStream **ret,
499 DnsStreamType type,
500 DnsProtocol protocol,
501 int fd,
502 const union sockaddr_union *tfo_address,
503 int (on_packet)(DnsStream*, DnsPacket*),
504 int (complete)(DnsStream*, int), /* optional */
505 usec_t connect_timeout_usec) {
506
507 _cleanup_(dns_stream_unrefp) DnsStream *s = NULL;
508 int r;
509
510 assert(m);
511 assert(ret);
512 assert(type >= 0);
513 assert(type < _DNS_STREAM_TYPE_MAX);
514 assert(protocol >= 0);
515 assert(protocol < _DNS_PROTOCOL_MAX);
516 assert(fd >= 0);
517 assert(on_packet);
518
519 if (m->n_dns_streams[type] > DNS_STREAMS_MAX)
520 return -EBUSY;
521
522 s = new(DnsStream, 1);
523 if (!s)
524 return -ENOMEM;
525
526 *s = (DnsStream) {
527 .n_ref = 1,
528 .fd = -1,
529 .protocol = protocol,
530 .type = type,
531 };
532
533 r = ordered_set_ensure_allocated(&s->write_queue, &dns_packet_hash_ops);
534 if (r < 0)
535 return r;
536
537 r = sd_event_add_io(m->event, &s->io_event_source, fd, EPOLLIN, on_stream_io, s);
538 if (r < 0)
539 return r;
540
541 (void) sd_event_source_set_description(s->io_event_source, "dns-stream-io");
542
543 r = sd_event_add_time_relative(
544 m->event,
545 &s->timeout_event_source,
546 CLOCK_BOOTTIME,
547 connect_timeout_usec, 0,
548 on_stream_timeout, s);
549 if (r < 0)
550 return r;
551
552 (void) sd_event_source_set_description(s->timeout_event_source, "dns-stream-timeout");
553
554 LIST_PREPEND(streams, m->dns_streams, s);
555 m->n_dns_streams[type]++;
556 s->manager = m;
557
558 s->fd = fd;
559 s->on_packet = on_packet;
560 s->complete = complete;
561
562 if (tfo_address) {
563 s->tfo_address = *tfo_address;
564 s->tfo_salen = tfo_address->sa.sa_family == AF_INET6 ? sizeof(tfo_address->in6) : sizeof(tfo_address->in);
565 }
566
567 *ret = TAKE_PTR(s);
568
569 return 0;
570 }
571
dns_stream_write_packet(DnsStream * s,DnsPacket * p)572 int dns_stream_write_packet(DnsStream *s, DnsPacket *p) {
573 int r;
574
575 assert(s);
576 assert(p);
577
578 r = ordered_set_put(s->write_queue, p);
579 if (r < 0)
580 return r;
581
582 dns_packet_ref(p);
583
584 return dns_stream_update_io(s);
585 }
586
dns_stream_detach(DnsStream * s)587 void dns_stream_detach(DnsStream *s) {
588 assert(s);
589
590 if (!s->server)
591 return;
592
593 if (s->server->stream != s)
594 return;
595
596 dns_server_unref_stream(s->server);
597 }
598