1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <netinet/tcp.h>
4 #include <unistd.h>
5 
6 #include "alloc-util.h"
7 #include "fd-util.h"
8 #include "io-util.h"
9 #include "macro.h"
10 #include "missing_network.h"
11 #include "resolved-dns-stream.h"
12 #include "resolved-manager.h"
13 
14 #define DNS_STREAMS_MAX 128
15 
16 #define DNS_QUERIES_PER_STREAM 32
17 
dns_stream_stop(DnsStream * s)18 static void dns_stream_stop(DnsStream *s) {
19         assert(s);
20 
21         s->io_event_source = sd_event_source_disable_unref(s->io_event_source);
22         s->timeout_event_source = sd_event_source_disable_unref(s->timeout_event_source);
23         s->fd = safe_close(s->fd);
24 
25         /* Disconnect us from the server object if we are now not usable anymore */
26         dns_stream_detach(s);
27 }
28 
dns_stream_update_io(DnsStream * s)29 static int dns_stream_update_io(DnsStream *s) {
30         uint32_t f = 0;
31 
32         assert(s);
33 
34         if (s->write_packet && s->n_written < sizeof(s->write_size) + s->write_packet->size)
35                 f |= EPOLLOUT;
36         else if (!ordered_set_isempty(s->write_queue)) {
37                 dns_packet_unref(s->write_packet);
38                 s->write_packet = ordered_set_steal_first(s->write_queue);
39                 s->write_size = htobe16(s->write_packet->size);
40                 s->n_written = 0;
41                 f |= EPOLLOUT;
42         }
43 
44         /* Let's read a packet if we haven't queued any yet. Except if we already hit a limit of parallel
45          * queries for this connection. */
46         if ((!s->read_packet || s->n_read < sizeof(s->read_size) + s->read_packet->size) &&
47                 set_size(s->queries) < DNS_QUERIES_PER_STREAM)
48                 f |= EPOLLIN;
49 
50         s->requested_events = f;
51 
52 #if ENABLE_DNS_OVER_TLS
53         /* For handshake and clean closing purposes, TLS can override requested events */
54         if (s->dnstls_events != 0)
55                 f = s->dnstls_events;
56 #endif
57 
58         return sd_event_source_set_io_events(s->io_event_source, f);
59 }
60 
dns_stream_complete(DnsStream * s,int error)61 static int dns_stream_complete(DnsStream *s, int error) {
62         _cleanup_(dns_stream_unrefp) _unused_ DnsStream *ref = dns_stream_ref(s); /* Protect stream while we process it */
63 
64         assert(s);
65         assert(error >= 0);
66 
67         /* Error is > 0 when the connection failed for some reason in the network stack. It's == 0 if we sent
68          * and received exactly one packet each (in the LLMNR client case). */
69 
70 #if ENABLE_DNS_OVER_TLS
71         if (s->encrypted) {
72                 int r;
73 
74                 r = dnstls_stream_shutdown(s, error);
75                 if (r != -EAGAIN)
76                         dns_stream_stop(s);
77         } else
78 #endif
79                 dns_stream_stop(s);
80 
81         dns_stream_detach(s);
82 
83         if (s->complete)
84                 s->complete(s, error);
85         else /* the default action if no completion function is set is to close the stream */
86                 dns_stream_unref(s);
87 
88         return 0;
89 }
90 
dns_stream_identify(DnsStream * s)91 static int dns_stream_identify(DnsStream *s) {
92         CMSG_BUFFER_TYPE(CMSG_SPACE(MAXSIZE(struct in_pktinfo, struct in6_pktinfo))
93                          + CMSG_SPACE(int) + /* for the TTL */
94                          + EXTRA_CMSG_SPACE /* kernel appears to require extra space */) control;
95         struct msghdr mh = {};
96         struct cmsghdr *cmsg;
97         socklen_t sl;
98         int r;
99 
100         assert(s);
101 
102         if (s->identified)
103                 return 0;
104 
105         /* Query the local side */
106         s->local_salen = sizeof(s->local);
107         r = getsockname(s->fd, &s->local.sa, &s->local_salen);
108         if (r < 0)
109                 return -errno;
110         if (s->local.sa.sa_family == AF_INET6 && s->ifindex <= 0)
111                 s->ifindex = s->local.in6.sin6_scope_id;
112 
113         /* Query the remote side */
114         s->peer_salen = sizeof(s->peer);
115         r = getpeername(s->fd, &s->peer.sa, &s->peer_salen);
116         if (r < 0)
117                 return -errno;
118         if (s->peer.sa.sa_family == AF_INET6 && s->ifindex <= 0)
119                 s->ifindex = s->peer.in6.sin6_scope_id;
120 
121         /* Check consistency */
122         assert(s->peer.sa.sa_family == s->local.sa.sa_family);
123         assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6));
124 
125         /* Query connection meta information */
126         sl = sizeof(control);
127         if (s->peer.sa.sa_family == AF_INET) {
128                 r = getsockopt(s->fd, IPPROTO_IP, IP_PKTOPTIONS, &control, &sl);
129                 if (r < 0)
130                         return -errno;
131         } else if (s->peer.sa.sa_family == AF_INET6) {
132 
133                 r = getsockopt(s->fd, IPPROTO_IPV6, IPV6_2292PKTOPTIONS, &control, &sl);
134                 if (r < 0)
135                         return -errno;
136         } else
137                 return -EAFNOSUPPORT;
138 
139         mh.msg_control = &control;
140         mh.msg_controllen = sl;
141 
142         CMSG_FOREACH(cmsg, &mh) {
143 
144                 if (cmsg->cmsg_level == IPPROTO_IPV6) {
145                         assert(s->peer.sa.sa_family == AF_INET6);
146 
147                         switch (cmsg->cmsg_type) {
148 
149                         case IPV6_PKTINFO: {
150                                 struct in6_pktinfo *i = (struct in6_pktinfo*) CMSG_DATA(cmsg);
151 
152                                 if (s->ifindex <= 0)
153                                         s->ifindex = i->ipi6_ifindex;
154                                 break;
155                         }
156 
157                         case IPV6_HOPLIMIT:
158                                 s->ttl = *(int *) CMSG_DATA(cmsg);
159                                 break;
160                         }
161 
162                 } else if (cmsg->cmsg_level == IPPROTO_IP) {
163                         assert(s->peer.sa.sa_family == AF_INET);
164 
165                         switch (cmsg->cmsg_type) {
166 
167                         case IP_PKTINFO: {
168                                 struct in_pktinfo *i = (struct in_pktinfo*) CMSG_DATA(cmsg);
169 
170                                 if (s->ifindex <= 0)
171                                         s->ifindex = i->ipi_ifindex;
172                                 break;
173                         }
174 
175                         case IP_TTL:
176                                 s->ttl = *(int *) CMSG_DATA(cmsg);
177                                 break;
178                         }
179                 }
180         }
181 
182         /* The Linux kernel sets the interface index to the loopback
183          * device if the connection came from the local host since it
184          * avoids the routing table in such a case. Let's unset the
185          * interface index in such a case. */
186         if (s->ifindex == LOOPBACK_IFINDEX)
187                 s->ifindex = 0;
188 
189         /* If we don't know the interface index still, we look for the
190          * first local interface with a matching address. Yuck! */
191         if (s->ifindex <= 0)
192                 s->ifindex = manager_find_ifindex(s->manager, s->local.sa.sa_family, sockaddr_in_addr(&s->local.sa));
193 
194         if (s->protocol == DNS_PROTOCOL_LLMNR && s->ifindex > 0) {
195                 /* Make sure all packets for this connection are sent on the same interface */
196                 r = socket_set_unicast_if(s->fd, s->local.sa.sa_family, s->ifindex);
197                 if (r < 0)
198                         log_debug_errno(errno, "Failed to invoke IP_UNICAST_IF/IPV6_UNICAST_IF: %m");
199         }
200 
201         s->identified = true;
202 
203         return 0;
204 }
205 
dns_stream_writev(DnsStream * s,const struct iovec * iov,size_t iovcnt,int flags)206 ssize_t dns_stream_writev(DnsStream *s, const struct iovec *iov, size_t iovcnt, int flags) {
207         ssize_t m;
208 
209         assert(s);
210         assert(iov);
211 
212 #if ENABLE_DNS_OVER_TLS
213         if (s->encrypted && !(flags & DNS_STREAM_WRITE_TLS_DATA))
214                 return dnstls_stream_writev(s, iov, iovcnt);
215 #endif
216 
217         if (s->tfo_salen > 0) {
218                 struct msghdr hdr = {
219                         .msg_iov = (struct iovec*) iov,
220                         .msg_iovlen = iovcnt,
221                         .msg_name = &s->tfo_address.sa,
222                         .msg_namelen = s->tfo_salen
223                 };
224 
225                 m = sendmsg(s->fd, &hdr, MSG_FASTOPEN);
226                 if (m < 0) {
227                         if (errno == EOPNOTSUPP) {
228                                 s->tfo_salen = 0;
229                                 if (connect(s->fd, &s->tfo_address.sa, s->tfo_salen) < 0)
230                                         return -errno;
231 
232                                 return -EAGAIN;
233                         }
234                         if (errno == EINPROGRESS)
235                                 return -EAGAIN;
236 
237                         return -errno;
238                 } else
239                         s->tfo_salen = 0; /* connection is made */
240         } else {
241                 m = writev(s->fd, iov, iovcnt);
242                 if (m < 0)
243                         return -errno;
244         }
245 
246         return m;
247 }
248 
dns_stream_read(DnsStream * s,void * buf,size_t count)249 static ssize_t dns_stream_read(DnsStream *s, void *buf, size_t count) {
250         ssize_t ss;
251 
252 #if ENABLE_DNS_OVER_TLS
253         if (s->encrypted)
254                 ss = dnstls_stream_read(s, buf, count);
255         else
256 #endif
257         {
258                 ss = read(s->fd, buf, count);
259                 if (ss < 0)
260                         return -errno;
261         }
262 
263         return ss;
264 }
265 
on_stream_timeout(sd_event_source * es,usec_t usec,void * userdata)266 static int on_stream_timeout(sd_event_source *es, usec_t usec, void *userdata) {
267         DnsStream *s = userdata;
268 
269         assert(s);
270 
271         return dns_stream_complete(s, ETIMEDOUT);
272 }
273 
dns_stream_take_read_packet(DnsStream * s)274 static DnsPacket *dns_stream_take_read_packet(DnsStream *s) {
275         assert(s);
276 
277         /* Note, dns_stream_update() should be called after this is called. When this is called, the
278          * stream may be already full and the EPOLLIN flag is dropped from the stream IO event source.
279          * Even this makes a room to read in the stream, this does not call dns_stream_update(), hence
280          * EPOLLIN flag is not set automatically. So, to read further packets from the stream,
281          * dns_stream_update() must be called explicitly. Currently, this is only called from
282          * on_stream_io(), and there dns_stream_update() is called. */
283 
284         if (!s->read_packet)
285                 return NULL;
286 
287         if (s->n_read < sizeof(s->read_size))
288                 return NULL;
289 
290         if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size))
291                 return NULL;
292 
293         s->n_read = 0;
294         return TAKE_PTR(s->read_packet);
295 }
296 
on_stream_io(sd_event_source * es,int fd,uint32_t revents,void * userdata)297 static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
298         _cleanup_(dns_stream_unrefp) DnsStream *s = dns_stream_ref(userdata); /* Protect stream while we process it */
299         bool progressed = false;
300         int r;
301 
302         assert(s);
303 
304 #if ENABLE_DNS_OVER_TLS
305         if (s->encrypted) {
306                 r = dnstls_stream_on_io(s, revents);
307                 if (r == DNSTLS_STREAM_CLOSED)
308                         return 0;
309                 if (r == -EAGAIN)
310                         return dns_stream_update_io(s);
311                 if (r < 0)
312                         return dns_stream_complete(s, -r);
313 
314                 r = dns_stream_update_io(s);
315                 if (r < 0)
316                         return r;
317         }
318 #endif
319 
320         /* only identify after connecting */
321         if (s->tfo_salen == 0) {
322                 r = dns_stream_identify(s);
323                 if (r < 0)
324                         return dns_stream_complete(s, -r);
325         }
326 
327         if ((revents & EPOLLOUT) &&
328             s->write_packet &&
329             s->n_written < sizeof(s->write_size) + s->write_packet->size) {
330 
331                 struct iovec iov[] = {
332                         IOVEC_MAKE(&s->write_size, sizeof(s->write_size)),
333                         IOVEC_MAKE(DNS_PACKET_DATA(s->write_packet), s->write_packet->size),
334                 };
335 
336                 IOVEC_INCREMENT(iov, ELEMENTSOF(iov), s->n_written);
337 
338                 ssize_t ss = dns_stream_writev(s, iov, ELEMENTSOF(iov), 0);
339                 if (ss < 0) {
340                         if (!ERRNO_IS_TRANSIENT(ss))
341                                 return dns_stream_complete(s, -ss);
342                 } else {
343                         progressed = true;
344                         s->n_written += ss;
345                 }
346 
347                 /* Are we done? If so, disable the event source for EPOLLOUT */
348                 if (s->n_written >= sizeof(s->write_size) + s->write_packet->size) {
349                         r = dns_stream_update_io(s);
350                         if (r < 0)
351                                 return dns_stream_complete(s, -r);
352                 }
353         }
354 
355         while ((revents & (EPOLLIN|EPOLLHUP|EPOLLRDHUP)) &&
356                (!s->read_packet ||
357                 s->n_read < sizeof(s->read_size) + s->read_packet->size)) {
358 
359                 if (s->n_read < sizeof(s->read_size)) {
360                         ssize_t ss;
361 
362                         ss = dns_stream_read(s, (uint8_t*) &s->read_size + s->n_read, sizeof(s->read_size) - s->n_read);
363                         if (ss < 0) {
364                                 if (!ERRNO_IS_TRANSIENT(ss))
365                                         return dns_stream_complete(s, -ss);
366                                 break;
367                         } else if (ss == 0)
368                                 return dns_stream_complete(s, ECONNRESET);
369                         else {
370                                 progressed = true;
371                                 s->n_read += ss;
372                         }
373                 }
374 
375                 if (s->n_read >= sizeof(s->read_size)) {
376 
377                         if (be16toh(s->read_size) < DNS_PACKET_HEADER_SIZE)
378                                 return dns_stream_complete(s, EBADMSG);
379 
380                         if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size)) {
381                                 ssize_t ss;
382 
383                                 if (!s->read_packet) {
384                                         r = dns_packet_new(&s->read_packet, s->protocol, be16toh(s->read_size), DNS_PACKET_SIZE_MAX);
385                                         if (r < 0)
386                                                 return dns_stream_complete(s, -r);
387 
388                                         s->read_packet->size = be16toh(s->read_size);
389                                         s->read_packet->ipproto = IPPROTO_TCP;
390                                         s->read_packet->family = s->peer.sa.sa_family;
391                                         s->read_packet->ttl = s->ttl;
392                                         s->read_packet->ifindex = s->ifindex;
393                                         s->read_packet->timestamp = now(CLOCK_BOOTTIME);
394 
395                                         if (s->read_packet->family == AF_INET) {
396                                                 s->read_packet->sender.in = s->peer.in.sin_addr;
397                                                 s->read_packet->sender_port = be16toh(s->peer.in.sin_port);
398                                                 s->read_packet->destination.in = s->local.in.sin_addr;
399                                                 s->read_packet->destination_port = be16toh(s->local.in.sin_port);
400                                         } else {
401                                                 assert(s->read_packet->family == AF_INET6);
402                                                 s->read_packet->sender.in6 = s->peer.in6.sin6_addr;
403                                                 s->read_packet->sender_port = be16toh(s->peer.in6.sin6_port);
404                                                 s->read_packet->destination.in6 = s->local.in6.sin6_addr;
405                                                 s->read_packet->destination_port = be16toh(s->local.in6.sin6_port);
406 
407                                                 if (s->read_packet->ifindex == 0)
408                                                         s->read_packet->ifindex = s->peer.in6.sin6_scope_id;
409                                                 if (s->read_packet->ifindex == 0)
410                                                         s->read_packet->ifindex = s->local.in6.sin6_scope_id;
411                                         }
412                                 }
413 
414                                 ss = dns_stream_read(s,
415                                           (uint8_t*) DNS_PACKET_DATA(s->read_packet) + s->n_read - sizeof(s->read_size),
416                                           sizeof(s->read_size) + be16toh(s->read_size) - s->n_read);
417                                 if (ss < 0) {
418                                         if (!ERRNO_IS_TRANSIENT(ss))
419                                                 return dns_stream_complete(s, -ss);
420                                         break;
421                                 } else if (ss == 0)
422                                         return dns_stream_complete(s, ECONNRESET);
423                                 else
424                                         s->n_read += ss;
425                         }
426 
427                         /* Are we done? If so, call the packet handler and re-enable EPOLLIN for the
428                          * event source if necessary. */
429                         _cleanup_(dns_packet_unrefp) DnsPacket *p = dns_stream_take_read_packet(s);
430                         if (p) {
431                                 assert(s->on_packet);
432                                 r = s->on_packet(s, p);
433                                 if (r < 0)
434                                         return r;
435 
436                                 r = dns_stream_update_io(s);
437                                 if (r < 0)
438                                         return dns_stream_complete(s, -r);
439 
440                                 s->packet_received = true;
441 
442                                 /* If we just disabled the read event, stop reading */
443                                 if (!FLAGS_SET(s->requested_events, EPOLLIN))
444                                         break;
445                         }
446                 }
447         }
448 
449         /* Complete the stream if finished reading and writing one packet, and there's nothing
450          * else left to write. */
451         if (s->type == DNS_STREAM_LLMNR_SEND && s->packet_received &&
452             !FLAGS_SET(s->requested_events, EPOLLOUT))
453                 return dns_stream_complete(s, 0);
454 
455         /* If we did something, let's restart the timeout event source */
456         if (progressed && s->timeout_event_source) {
457                 r = sd_event_source_set_time_relative(s->timeout_event_source, DNS_STREAM_ESTABLISHED_TIMEOUT_USEC);
458                 if (r < 0)
459                         log_warning_errno(errno, "Couldn't restart TCP connection timeout, ignoring: %m");
460         }
461 
462         return 0;
463 }
464 
dns_stream_free(DnsStream * s)465 static DnsStream *dns_stream_free(DnsStream *s) {
466         DnsPacket *p;
467 
468         assert(s);
469 
470         dns_stream_stop(s);
471 
472         if (s->manager) {
473                 LIST_REMOVE(streams, s->manager->dns_streams, s);
474                 s->manager->n_dns_streams[s->type]--;
475         }
476 
477 #if ENABLE_DNS_OVER_TLS
478         if (s->encrypted)
479                 dnstls_stream_free(s);
480 #endif
481 
482         ORDERED_SET_FOREACH(p, s->write_queue)
483                 dns_packet_unref(ordered_set_remove(s->write_queue, p));
484 
485         dns_packet_unref(s->write_packet);
486         dns_packet_unref(s->read_packet);
487         dns_server_unref(s->server);
488 
489         ordered_set_free(s->write_queue);
490 
491         return mfree(s);
492 }
493 
494 DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsStream, dns_stream, dns_stream_free);
495 
dns_stream_new(Manager * m,DnsStream ** ret,DnsStreamType type,DnsProtocol protocol,int fd,const union sockaddr_union * tfo_address,int (on_packet)(DnsStream *,DnsPacket *),int (complete)(DnsStream *,int),usec_t connect_timeout_usec)496 int dns_stream_new(
497                 Manager *m,
498                 DnsStream **ret,
499                 DnsStreamType type,
500                 DnsProtocol protocol,
501                 int fd,
502                 const union sockaddr_union *tfo_address,
503                 int (on_packet)(DnsStream*, DnsPacket*),
504                 int (complete)(DnsStream*, int), /* optional */
505                 usec_t connect_timeout_usec) {
506 
507         _cleanup_(dns_stream_unrefp) DnsStream *s = NULL;
508         int r;
509 
510         assert(m);
511         assert(ret);
512         assert(type >= 0);
513         assert(type < _DNS_STREAM_TYPE_MAX);
514         assert(protocol >= 0);
515         assert(protocol < _DNS_PROTOCOL_MAX);
516         assert(fd >= 0);
517         assert(on_packet);
518 
519         if (m->n_dns_streams[type] > DNS_STREAMS_MAX)
520                 return -EBUSY;
521 
522         s = new(DnsStream, 1);
523         if (!s)
524                 return -ENOMEM;
525 
526         *s = (DnsStream) {
527                 .n_ref = 1,
528                 .fd = -1,
529                 .protocol = protocol,
530                 .type = type,
531         };
532 
533         r = ordered_set_ensure_allocated(&s->write_queue, &dns_packet_hash_ops);
534         if (r < 0)
535                 return r;
536 
537         r = sd_event_add_io(m->event, &s->io_event_source, fd, EPOLLIN, on_stream_io, s);
538         if (r < 0)
539                 return r;
540 
541         (void) sd_event_source_set_description(s->io_event_source, "dns-stream-io");
542 
543         r = sd_event_add_time_relative(
544                         m->event,
545                         &s->timeout_event_source,
546                         CLOCK_BOOTTIME,
547                         connect_timeout_usec, 0,
548                         on_stream_timeout, s);
549         if (r < 0)
550                 return r;
551 
552         (void) sd_event_source_set_description(s->timeout_event_source, "dns-stream-timeout");
553 
554         LIST_PREPEND(streams, m->dns_streams, s);
555         m->n_dns_streams[type]++;
556         s->manager = m;
557 
558         s->fd = fd;
559         s->on_packet = on_packet;
560         s->complete = complete;
561 
562         if (tfo_address) {
563                 s->tfo_address = *tfo_address;
564                 s->tfo_salen = tfo_address->sa.sa_family == AF_INET6 ? sizeof(tfo_address->in6) : sizeof(tfo_address->in);
565         }
566 
567         *ret = TAKE_PTR(s);
568 
569         return 0;
570 }
571 
dns_stream_write_packet(DnsStream * s,DnsPacket * p)572 int dns_stream_write_packet(DnsStream *s, DnsPacket *p) {
573         int r;
574 
575         assert(s);
576         assert(p);
577 
578         r = ordered_set_put(s->write_queue, p);
579         if (r < 0)
580                 return r;
581 
582         dns_packet_ref(p);
583 
584         return dns_stream_update_io(s);
585 }
586 
dns_stream_detach(DnsStream * s)587 void dns_stream_detach(DnsStream *s) {
588         assert(s);
589 
590         if (!s->server)
591                 return;
592 
593         if (s->server->stream != s)
594                 return;
595 
596         dns_server_unref_stream(s->server);
597 }
598