1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <net/if_arp.h>
4 #include <netinet/tcp.h>
5 
6 #include "errno-util.h"
7 #include "fd-util.h"
8 #include "missing_network.h"
9 #include "missing_socket.h"
10 #include "resolved-dns-stub.h"
11 #include "socket-netlink.h"
12 #include "socket-util.h"
13 #include "stdio-util.h"
14 #include "string-table.h"
15 
16 /* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet,
17  * IP and UDP header sizes */
18 #define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U)
19 
20 /* On the extra stubs, use a more conservative choice */
21 #define ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX DNS_PACKET_UNICAST_SIZE_LARGE_MAX
22 
23 static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type);
24 static int manager_dns_stub_fd(Manager *m, int family, const union in_addr_union *listen_address, int type);
25 
dns_stub_listener_extra_hash_func(const DnsStubListenerExtra * a,struct siphash * state)26 static void dns_stub_listener_extra_hash_func(const DnsStubListenerExtra *a, struct siphash *state) {
27         assert(a);
28 
29         siphash24_compress(&a->mode, sizeof(a->mode), state);
30         siphash24_compress(&a->family, sizeof(a->family), state);
31         siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state);
32         siphash24_compress(&a->port, sizeof(a->port), state);
33 }
34 
dns_stub_listener_extra_compare_func(const DnsStubListenerExtra * a,const DnsStubListenerExtra * b)35 static int dns_stub_listener_extra_compare_func(const DnsStubListenerExtra *a, const DnsStubListenerExtra *b) {
36         int r;
37 
38         assert(a);
39         assert(b);
40 
41         r = CMP(a->mode, b->mode);
42         if (r != 0)
43                 return r;
44 
45         r = CMP(a->family, b->family);
46         if (r != 0)
47                 return r;
48 
49         r = memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
50         if (r != 0)
51                 return r;
52 
53         return CMP(a->port, b->port);
54 }
55 
56 DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
57                 dns_stub_listener_extra_hash_ops,
58                 DnsStubListenerExtra,
59                 dns_stub_listener_extra_hash_func,
60                 dns_stub_listener_extra_compare_func,
61                 dns_stub_listener_extra_free);
62 
dns_stub_listener_extra_new(Manager * m,DnsStubListenerExtra ** ret)63 int dns_stub_listener_extra_new(
64                 Manager *m,
65                 DnsStubListenerExtra **ret) {
66 
67         DnsStubListenerExtra *l;
68 
69         l = new(DnsStubListenerExtra, 1);
70         if (!l)
71                 return -ENOMEM;
72 
73         *l = (DnsStubListenerExtra) {
74                 .manager = m,
75         };
76 
77         *ret = TAKE_PTR(l);
78         return 0;
79 }
80 
dns_stub_listener_extra_free(DnsStubListenerExtra * p)81 DnsStubListenerExtra *dns_stub_listener_extra_free(DnsStubListenerExtra *p) {
82         if (!p)
83                 return NULL;
84 
85         p->udp_event_source = sd_event_source_disable_unref(p->udp_event_source);
86         p->tcp_event_source = sd_event_source_disable_unref(p->tcp_event_source);
87 
88         hashmap_free(p->queries_by_packet);
89 
90         return mfree(p);
91 }
92 
stub_packet_hash_func(const DnsPacket * p,struct siphash * state)93 static void stub_packet_hash_func(const DnsPacket *p, struct siphash *state) {
94         assert(p);
95 
96         siphash24_compress(&p->protocol, sizeof(p->protocol), state);
97         siphash24_compress(&p->family, sizeof(p->family), state);
98         siphash24_compress(&p->sender, sizeof(p->sender), state);
99         siphash24_compress(&p->ipproto, sizeof(p->ipproto), state);
100         siphash24_compress(&p->sender_port, sizeof(p->sender_port), state);
101         siphash24_compress(DNS_PACKET_HEADER(p), sizeof(DnsPacketHeader), state);
102 
103         /* We don't bother hashing the full packet here, just the header */
104 }
105 
stub_packet_compare_func(const DnsPacket * x,const DnsPacket * y)106 static int stub_packet_compare_func(const DnsPacket *x, const DnsPacket *y) {
107         int r;
108 
109         r = CMP(x->protocol, y->protocol);
110         if (r != 0)
111                 return r;
112 
113         r = CMP(x->family, y->family);
114         if (r != 0)
115                 return r;
116 
117         r = memcmp(&x->sender, &y->sender, sizeof(x->sender));
118         if (r != 0)
119                 return r;
120 
121         r = CMP(x->ipproto, y->ipproto);
122         if (r != 0)
123                 return r;
124 
125         r = CMP(x->sender_port, y->sender_port);
126         if (r != 0)
127                 return r;
128 
129         return memcmp(DNS_PACKET_HEADER(x), DNS_PACKET_HEADER(y), sizeof(DnsPacketHeader));
130 }
131 
132 DEFINE_HASH_OPS(stub_packet_hash_ops, DnsPacket, stub_packet_hash_func, stub_packet_compare_func);
133 
reply_add_with_rrsig(DnsAnswer ** reply,DnsResourceRecord * rr,int ifindex,DnsAnswerFlags flags,DnsResourceRecord * rrsig,bool with_rrsig)134 static int reply_add_with_rrsig(
135                 DnsAnswer **reply,
136                 DnsResourceRecord *rr,
137                 int ifindex,
138                 DnsAnswerFlags flags,
139                 DnsResourceRecord *rrsig,
140                 bool with_rrsig) {
141         int r;
142 
143         assert(reply);
144         assert(rr);
145 
146         r = dns_answer_add_extend(reply, rr, ifindex, flags, rrsig);
147         if (r < 0)
148                 return r;
149 
150         if (with_rrsig && rrsig) {
151                 r = dns_answer_add_extend(reply, rrsig, ifindex, flags, NULL);
152                 if (r < 0)
153                         return r;
154         }
155 
156         return 0;
157 }
158 
dns_stub_collect_answer_by_question(DnsAnswer ** reply,DnsAnswer * answer,DnsQuestion * question,bool with_rrsig)159 static int dns_stub_collect_answer_by_question(
160                 DnsAnswer **reply,
161                 DnsAnswer *answer,
162                 DnsQuestion *question,
163                 bool with_rrsig) { /* Add RRSIG RR matching each RR */
164 
165         DnsAnswerItem *item;
166         int r;
167 
168         assert(reply);
169 
170         /* Copies all RRs from 'answer' into 'reply', if they match 'question'. */
171 
172         DNS_ANSWER_FOREACH_ITEM(item, answer) {
173 
174                 /* We have a question, let's see if this RR matches it */
175                 r = dns_question_matches_rr(question, item->rr, NULL);
176                 if (r < 0)
177                         return r;
178                 if (!r) {
179                         /* Maybe there's a CNAME/DNAME in here? If so, that's an answer too */
180                         r = dns_question_matches_cname_or_dname(question, item->rr, NULL);
181                         if (r < 0)
182                                 return r;
183                         if (!r)
184                                 continue;
185                 }
186 
187                 /* Mask the section info, we want the primary answers to always go without section
188                  * info, so that it is added to the answer section when we synthesize a reply. */
189 
190                 r = reply_add_with_rrsig(
191                                 reply,
192                                 item->rr,
193                                 item->ifindex,
194                                 item->flags & ~DNS_ANSWER_MASK_SECTIONS,
195                                 item->rrsig,
196                                 with_rrsig);
197                 if (r < 0)
198                         return r;
199         }
200 
201         return 0;
202 }
203 
dns_stub_collect_answer_by_section(DnsAnswer ** reply,DnsAnswer * answer,DnsAnswerFlags section,DnsAnswer * exclude1,DnsAnswer * exclude2,bool with_dnssec)204 static int dns_stub_collect_answer_by_section(
205                 DnsAnswer **reply,
206                 DnsAnswer *answer,
207                 DnsAnswerFlags section,
208                 DnsAnswer *exclude1,
209                 DnsAnswer *exclude2,
210                 bool with_dnssec) { /* Include DNSSEC RRs. RRSIG, NSEC, … */
211 
212         DnsAnswerItem *item;
213         int r;
214 
215         assert(reply);
216 
217         /* Copies all RRs from 'answer' into 'reply', if they originate from the specified section. Also,
218          * avoid any RRs listed in 'exclude'. */
219 
220         DNS_ANSWER_FOREACH_ITEM(item, answer) {
221 
222                 if (dns_answer_contains(exclude1, item->rr) ||
223                     dns_answer_contains(exclude2, item->rr))
224                         continue;
225 
226                 if (!with_dnssec &&
227                     dns_type_is_dnssec(item->rr->key->type))
228                         continue;
229 
230                 if (((item->flags ^ section) & DNS_ANSWER_MASK_SECTIONS) != 0)
231                         continue;
232 
233                 r = reply_add_with_rrsig(
234                                 reply,
235                                 item->rr,
236                                 item->ifindex,
237                                 item->flags,
238                                 item->rrsig,
239                                 with_dnssec);
240                 if (r < 0)
241                         return r;
242         }
243 
244         return 0;
245 }
246 
dns_stub_assign_sections(DnsQuery * q,DnsQuestion * question,bool edns0_do)247 static int dns_stub_assign_sections(
248                 DnsQuery *q,
249                 DnsQuestion *question,
250                 bool edns0_do) {
251 
252         int r;
253 
254         assert(q);
255         assert(question);
256 
257         /* Let's assign the 'answer' RRs we collected to their respective sections in the reply datagram. We
258          * try to reproduce a section assignment similar to what the upstream DNS server responded to us. We
259          * use the DNS_ANSWER_SECTION_xyz flags to match things up, which is where the original upstream's
260          * packet section assignment is stored in the DnsAnswer object. Not all RRs in the 'answer' objects
261          * come with section information though (for example, because they were synthesized locally, and not
262          * from a DNS packet). To deal with that we extend the assignment logic a bit: anything from the
263          * 'answer' object that directly matches the original question is always put in the ANSWER section,
264          * regardless if it carries section info, or what that section info says. Then, anything from the
265          * 'answer' objects that is from the ANSWER or AUTHORITY sections, and wasn't already added to the
266          * ANSWER section is placed in the AUTHORITY section. Everything else from either object is added to
267          * the ADDITIONAL section. */
268 
269         /* Include all RRs that directly answer the question in the answer section */
270         r = dns_stub_collect_answer_by_question(
271                         &q->reply_answer,
272                         q->answer,
273                         question,
274                         edns0_do);
275         if (r < 0)
276                 return r;
277 
278         /* Include all RRs that originate from the authority sections, and aren't already listed in the
279          * answer section, in the authority section */
280         r = dns_stub_collect_answer_by_section(
281                         &q->reply_authoritative,
282                         q->answer,
283                         DNS_ANSWER_SECTION_AUTHORITY,
284                         q->reply_answer, NULL,
285                         edns0_do);
286         if (r < 0)
287                 return r;
288 
289         /* Include all RRs that originate from the answer or additional sections in the additional section
290          * (except if already listed in the other two sections). Also add all RRs with no section marking. */
291         r = dns_stub_collect_answer_by_section(
292                         &q->reply_additional,
293                         q->answer,
294                         DNS_ANSWER_SECTION_ANSWER,
295                         q->reply_answer, q->reply_authoritative,
296                         edns0_do);
297         if (r < 0)
298                 return r;
299         r = dns_stub_collect_answer_by_section(
300                         &q->reply_additional,
301                         q->answer,
302                         DNS_ANSWER_SECTION_ADDITIONAL,
303                         q->reply_answer, q->reply_authoritative,
304                         edns0_do);
305         if (r < 0)
306                 return r;
307         r = dns_stub_collect_answer_by_section(
308                         &q->reply_additional,
309                         q->answer,
310                         0,
311                         q->reply_answer, q->reply_authoritative,
312                         edns0_do);
313         if (r < 0)
314                 return r;
315 
316         return 0;
317 }
318 
dns_stub_make_reply_packet(DnsPacket ** ret,size_t max_size,DnsQuestion * q,bool * ret_truncated)319 static int dns_stub_make_reply_packet(
320                 DnsPacket **ret,
321                 size_t max_size,
322                 DnsQuestion *q,
323                 bool *ret_truncated) {
324 
325         _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
326         bool tc = false;
327         int r;
328 
329         assert(ret);
330 
331         r = dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, max_size);
332         if (r < 0)
333                 return r;
334 
335         r = dns_packet_append_question(p, q);
336         if (r == -EMSGSIZE)
337                 tc = true;
338         else if (r < 0)
339                 return r;
340 
341         if (ret_truncated)
342                 *ret_truncated = tc;
343         else if (tc)
344                 return -EMSGSIZE;
345 
346         DNS_PACKET_HEADER(p)->qdcount = htobe16(dns_question_size(q));
347 
348         *ret = TAKE_PTR(p);
349         return 0;
350 }
351 
dns_stub_add_reply_packet_body(DnsPacket * p,DnsAnswer * answer,DnsAnswer * authoritative,DnsAnswer * additional,bool edns0_do,bool * truncated)352 static int dns_stub_add_reply_packet_body(
353                 DnsPacket *p,
354                 DnsAnswer *answer,
355                 DnsAnswer *authoritative,
356                 DnsAnswer *additional,
357                 bool edns0_do, /* Client expects DNSSEC RRs? */
358                 bool *truncated) {
359 
360         unsigned n_answer = 0, n_authoritative = 0, n_additional = 0;
361         bool tc = false;
362         int r;
363 
364         assert(p);
365 
366         /* Add the three sections to the packet. If the answer section doesn't fit we'll signal that as
367          * truncation. If the authoritative section doesn't fit and we are in DNSSEC mode, also signal
368          * truncation. In all other cases where things don't fit don't signal truncation, as for those cases
369          * the dropped RRs should not be essential. */
370 
371         r = dns_packet_append_answer(p, answer, &n_answer);
372         if (r == -EMSGSIZE)
373                 tc = true;
374         else if (r < 0)
375                 return r;
376         else {
377                 r = dns_packet_append_answer(p, authoritative, &n_authoritative);
378                 if (r == -EMSGSIZE) {
379                         if (edns0_do)
380                                 tc = true;
381                 } else if (r < 0)
382                         return r;
383                 else {
384                         r = dns_packet_append_answer(p, additional, &n_additional);
385                         if (r < 0 && r != -EMSGSIZE)
386                                 return r;
387                 }
388         }
389 
390         if (tc) {
391                 if (!truncated)
392                         return -EMSGSIZE;
393 
394                 *truncated = true;
395         }
396 
397         DNS_PACKET_HEADER(p)->ancount = htobe16(n_answer);
398         DNS_PACKET_HEADER(p)->nscount = htobe16(n_authoritative);
399         DNS_PACKET_HEADER(p)->arcount = htobe16(n_additional);
400         return 0;
401 }
402 
nsid_string(void)403 static const char *nsid_string(void) {
404         static char buffer[SD_ID128_STRING_MAX + STRLEN(".resolved.systemd.io")] = "";
405         sd_id128_t id;
406         int r;
407 
408         /* Let's generate a string that we can use as RFC5001 NSID identifier. The string shall identify us
409          * as systemd-resolved, and return a different string for each resolved instance without leaking host
410          * identity. Hence let's use a fixed suffix that identifies resolved, and a prefix generated from the
411          * machine ID but from which the machine ID cannot be determined.
412          *
413          * Clients can use this to determine whether an answer is originating locally or is proxied from
414          * upstream. */
415 
416         if (!isempty(buffer))
417                 return buffer;
418 
419         r = sd_id128_get_machine_app_specific(
420                         SD_ID128_MAKE(ed,d3,12,5d,16,b9,41,f9,a1,49,5f,ab,15,62,ab,27),
421                         &id);
422         if (r < 0) {
423                 log_debug_errno(r, "Failed to determine machine ID, ignoring: %m");
424                 return NULL;
425         }
426 
427         xsprintf(buffer, SD_ID128_FORMAT_STR ".resolved.systemd.io", SD_ID128_FORMAT_VAL(id));
428         return buffer;
429 }
430 
dns_stub_finish_reply_packet(DnsPacket * p,uint16_t id,int rcode,bool tc,bool aa,bool rd,bool add_opt,bool edns0_do,bool ad,bool cd,uint16_t max_udp_size,bool nsid)431 static int dns_stub_finish_reply_packet(
432                 DnsPacket *p,
433                 uint16_t id,
434                 int rcode,
435                 bool tc,        /* set the Truncated bit? */
436                 bool aa,        /* set the Authoritative Answer bit? */
437                 bool rd,        /* set the Recursion Desired bit? */
438                 bool add_opt,   /* add an OPT RR to this packet? */
439                 bool edns0_do,  /* set the EDNS0 DNSSEC OK bit? */
440                 bool ad,        /* set the DNSSEC authenticated data bit? */
441                 bool cd,        /* set the DNSSEC checking disabled bit? */
442                 uint16_t max_udp_size, /* The maximum UDP datagram size to advertise to clients */
443                 bool nsid) {    /* whether to add NSID */
444 
445         int r;
446 
447         assert(p);
448 
449         if (add_opt) {
450                 r = dns_packet_append_opt(p, max_udp_size, edns0_do, /* include_rfc6975 = */ false, nsid ? nsid_string() : NULL, rcode, NULL);
451                 if (r == -EMSGSIZE) /* Hit the size limit? then indicate truncation */
452                         tc = true;
453                 else if (r < 0)
454                         return r;
455         } else {
456                 /* If the client can't to EDNS0, don't do DO either */
457                 edns0_do = false;
458 
459                 /* If we don't do EDNS, clamp the rcode to 4 bit */
460                 if (rcode > 0xF)
461                         rcode = DNS_RCODE_SERVFAIL;
462         }
463 
464         /* Don't set the CD bit unless DO is on, too */
465         if (!edns0_do)
466                 cd = false;
467 
468         /* Note that we allow the AD bit to be set even if client didn't signal DO, as per RFC 6840, section
469          * 5.7 */
470 
471         DNS_PACKET_HEADER(p)->id = id;
472 
473         DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
474                                                               1  /* qr */,
475                                                               0  /* opcode */,
476                                                               aa /* aa */,
477                                                               tc /* tc */,
478                                                               rd /* rd */,
479                                                               1  /* ra */,
480                                                               ad /* ad */,
481                                                               cd /* cd */,
482                                                               rcode));
483 
484         return 0;
485 }
486 
address_is_proxy(int family,const union in_addr_union * a)487 static bool address_is_proxy(int family, const union in_addr_union *a) {
488         assert(a);
489 
490         /* Returns true if the specified address is the DNS "proxy" stub, i.e. where we unconditionally enable bypass mode */
491 
492         if (family != AF_INET)
493                 return false;
494 
495         return be32toh(a->in.s_addr) == INADDR_DNS_PROXY_STUB;
496 }
497 
find_socket_fd(Manager * m,DnsStubListenerExtra * l,int family,const union in_addr_union * listen_address,int type)498 static int find_socket_fd(
499                 Manager *m,
500                 DnsStubListenerExtra *l,
501                 int family,
502                 const union in_addr_union *listen_address,
503                 int type) {
504 
505         assert(m);
506 
507         /* Finds the right socket to use for sending. If we know the extra listener, otherwise go via the
508          * address to send from */
509         if (l)
510                 return manager_dns_stub_fd_extra(m, l, type);
511 
512         return manager_dns_stub_fd(m, family, listen_address, type);
513 }
514 
dns_stub_send(Manager * m,DnsStubListenerExtra * l,DnsStream * s,DnsPacket * p,DnsPacket * reply)515 static int dns_stub_send(
516                 Manager *m,
517                 DnsStubListenerExtra *l,
518                 DnsStream *s,
519                 DnsPacket *p,
520                 DnsPacket *reply) {
521 
522         int r;
523 
524         assert(m);
525         assert(p);
526         assert(reply);
527 
528         if (s)
529                 r = dns_stream_write_packet(s, reply);
530         else {
531                 int fd;
532 
533                 fd = find_socket_fd(m, l, p->family, &p->sender, SOCK_DGRAM);
534                 if (fd < 0)
535                         return fd;
536 
537                 /* Note that it is essential here that we explicitly choose the source IP address for this
538                  * packet. This is because otherwise the kernel will choose it automatically based on the
539                  * routing table and will thus pick 127.0.0.1 rather than 127.0.0.53. */
540                 r = manager_send(m,
541                                  fd,
542                                  l || address_is_proxy(p->family, &p->destination) ? p->ifindex : LOOPBACK_IFINDEX, /* force loopback iface if this is the main listener stub */
543                                  p->family, &p->sender, p->sender_port, &p->destination,
544                                  reply);
545         }
546         if (r < 0)
547                 return log_debug_errno(r, "Failed to send reply packet: %m");
548 
549         return 0;
550 }
551 
dns_stub_reply_with_edns0_do(DnsQuery * q)552 static int dns_stub_reply_with_edns0_do(DnsQuery *q) {
553          assert(q);
554 
555         /* Reply with DNSSEC DO set? Only if client supports it; and we did any DNSSEC verification
556          * ourselves, or consider the data fully authenticated because we generated it locally, or the client
557          * set cd */
558 
559          return DNS_PACKET_DO(q->request_packet) &&
560                  (q->answer_dnssec_result >= 0 ||        /* we did proper DNSSEC validation … */
561                   dns_query_fully_authenticated(q) ||    /* … or we considered it authentic otherwise … */
562                   DNS_PACKET_CD(q->request_packet));     /* … or client set CD */
563 }
564 
dns_stub_suppress_duplicate_section_rrs(DnsQuery * q)565 static void dns_stub_suppress_duplicate_section_rrs(DnsQuery *q) {
566         /* If we follow a CNAME/DNAME chain we might end up populating our sections with redundant RRs
567          * because we built up the sections from multiple reply packets (one from each CNAME/DNAME chain
568          * element). E.g. it could be that an RR that was included in the first reply's additional section
569          * ends up being relevant as main answer in a subsequent reply in the chain. Let's clean this up, and
570          * remove everything in the "higher priority" sections from the "lower priority" sections.
571          *
572          * Note that this removal matches by RR keys instead of the full RRs. This is because RRsets should
573          * always end up in one section fully or not at all, but never be split among sections.
574          *
575          * Specifically: we remove ANSWER section RRs from the AUTHORITATIVE and ADDITIONAL sections, as well
576          * as AUTHORITATIVE section RRs from the ADDITIONAL section. */
577 
578         dns_answer_remove_by_answer_keys(&q->reply_authoritative, q->reply_answer);
579         dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_answer);
580         dns_answer_remove_by_answer_keys(&q->reply_additional, q->reply_authoritative);
581 }
582 
dns_stub_send_reply(DnsQuery * q,int rcode)583 static int dns_stub_send_reply(
584                 DnsQuery *q,
585                 int rcode) {
586 
587         _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
588         bool truncated, edns0_do;
589         int r;
590 
591         assert(q);
592 
593         edns0_do = dns_stub_reply_with_edns0_do(q); /* let's check if we shall reply with EDNS0 DO? */
594 
595         r = dns_stub_make_reply_packet(
596                         &reply,
597                         DNS_PACKET_PAYLOAD_SIZE_MAX(q->request_packet),
598                         q->request_packet->question,
599                         &truncated);
600         if (r < 0)
601                 return log_debug_errno(r, "Failed to build reply packet: %m");
602 
603         dns_stub_suppress_duplicate_section_rrs(q);
604 
605         r = dns_stub_add_reply_packet_body(
606                         reply,
607                         q->reply_answer,
608                         q->reply_authoritative,
609                         q->reply_additional,
610                         edns0_do,
611                         &truncated);
612         if (r < 0)
613                 return log_debug_errno(r, "Failed to append reply packet body: %m");
614 
615         r = dns_stub_finish_reply_packet(
616                         reply,
617                         DNS_PACKET_ID(q->request_packet),
618                         rcode,
619                         truncated,
620                         dns_query_fully_authoritative(q),
621                         DNS_PACKET_RD(q->request_packet),
622                         !!q->request_packet->opt,
623                         edns0_do,
624                         (DNS_PACKET_AD(q->request_packet) || DNS_PACKET_DO(q->request_packet)) && dns_query_fully_authenticated(q),
625                         DNS_PACKET_CD(q->request_packet),
626                         q->stub_listener_extra ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
627                         dns_packet_has_nsid_request(q->request_packet) > 0 && !q->stub_listener_extra);
628         if (r < 0)
629                 return log_debug_errno(r, "Failed to build failure packet: %m");
630 
631         return dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
632 }
633 
dns_stub_send_failure(Manager * m,DnsStubListenerExtra * l,DnsStream * s,DnsPacket * p,int rcode,bool authenticated)634 static int dns_stub_send_failure(
635                 Manager *m,
636                 DnsStubListenerExtra *l,
637                 DnsStream *s,
638                 DnsPacket *p,
639                 int rcode,
640                 bool authenticated) {
641 
642         _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
643         bool truncated;
644         int r;
645 
646         assert(m);
647         assert(p);
648 
649         r = dns_stub_make_reply_packet(
650                         &reply,
651                         DNS_PACKET_PAYLOAD_SIZE_MAX(p),
652                         p->question,
653                         &truncated);
654         if (r < 0)
655                 return log_debug_errno(r, "Failed to make failure packet: %m");
656 
657         r = dns_stub_finish_reply_packet(
658                         reply,
659                         DNS_PACKET_ID(p),
660                         rcode,
661                         truncated,
662                         false,
663                         DNS_PACKET_RD(p),
664                         !!p->opt,
665                         DNS_PACKET_DO(p),
666                         (DNS_PACKET_AD(p) || DNS_PACKET_DO(p)) && authenticated,
667                         DNS_PACKET_CD(p),
668                         l ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX,
669                         dns_packet_has_nsid_request(p) > 0 && !l);
670         if (r < 0)
671                 return log_debug_errno(r, "Failed to build failure packet: %m");
672 
673         return dns_stub_send(m, l, s, p, reply);
674 }
675 
dns_stub_patch_bypass_reply_packet(DnsPacket ** ret,DnsPacket * original,DnsPacket * request)676 static int dns_stub_patch_bypass_reply_packet(
677                 DnsPacket **ret,       /* Where to place the patched packet */
678                 DnsPacket *original,   /* The packet to patch */
679                 DnsPacket *request) {  /* The packet the patched packet shall look like a reply to */
680         _cleanup_(dns_packet_unrefp) DnsPacket *c = NULL;
681         int r;
682 
683         assert(ret);
684         assert(original);
685         assert(request);
686 
687         r = dns_packet_dup(&c, original);
688         if (r < 0)
689                 return r;
690 
691         /* Extract the packet, so that we know where the OPT field is */
692         r = dns_packet_extract(c);
693         if (r < 0)
694                 return r;
695 
696         /* Copy over the original client request ID, so that we can make the upstream query look like our own reply. */
697         DNS_PACKET_HEADER(c)->id = DNS_PACKET_HEADER(request)->id;
698 
699         /* Patch in our own maximum datagram size, if EDNS0 was on */
700         r = dns_packet_patch_max_udp_size(c, ADVERTISE_DATAGRAM_SIZE_MAX);
701         if (r < 0)
702                 return r;
703 
704         /* Lower all TTLs by the time passed since we received the datagram. */
705         if (timestamp_is_set(original->timestamp)) {
706                 r = dns_packet_patch_ttls(c, original->timestamp);
707                 if (r < 0)
708                         return r;
709         }
710 
711         /* Our upstream connection might have supported larger DNS requests than our downstream one, hence
712          * set the TC bit if our reply is larger than what the client supports, and truncate. */
713         if (c->size > DNS_PACKET_PAYLOAD_SIZE_MAX(request)) {
714                 log_debug("Artificially truncating stub response, as advertised size of client is smaller than upstream one.");
715                 dns_packet_truncate(c, DNS_PACKET_PAYLOAD_SIZE_MAX(request));
716                 DNS_PACKET_HEADER(c)->flags = htobe16(be16toh(DNS_PACKET_HEADER(c)->flags) | DNS_PACKET_FLAG_TC);
717         }
718 
719         *ret = TAKE_PTR(c);
720         return 0;
721 }
722 
dns_stub_query_complete(DnsQuery * query)723 static void dns_stub_query_complete(DnsQuery *query) {
724         _cleanup_(dns_query_freep) DnsQuery *q = query;
725         int r;
726 
727         assert(q);
728         assert(q->request_packet);
729 
730         if (q->question_bypass) {
731                 /* This is a bypass reply. If so, let's propagate the upstream packet, if we have it and it
732                  * is regular DNS. (We can't do this if the upstream packet is LLMNR or mDNS, since the
733                  * packets are not 100% compatible.) */
734 
735                 if (q->answer_full_packet &&
736                     q->answer_full_packet->protocol == DNS_PROTOCOL_DNS) {
737                         _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
738 
739                         r = dns_stub_patch_bypass_reply_packet(&reply, q->answer_full_packet, q->request_packet);
740                         if (r < 0)
741                                 log_debug_errno(r, "Failed to patch bypass reply packet: %m");
742                         else
743                                 (void) dns_stub_send(q->manager, q->stub_listener_extra, q->request_stream, q->request_packet, reply);
744 
745                         return;
746                 }
747         }
748 
749         /* Take all data from the current reply, and merge it into the three reply sections we are building
750          * up. We do this before processing CNAME redirects, so that we gradually build up our sections, and
751          * and keep adding all RRs in the CNAME chain. */
752         r = dns_stub_assign_sections(
753                         q,
754                         dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS),
755                         dns_stub_reply_with_edns0_do(q));
756         if (r < 0)
757                 return (void) log_debug_errno(r, "Failed to assign sections: %m");
758 
759         switch (q->state) {
760 
761         case DNS_TRANSACTION_SUCCESS: {
762                 bool first = true;
763 
764                 for (;;) {
765                         int cname_result;
766 
767                         cname_result = dns_query_process_cname_one(q);
768                         if (cname_result == -ELOOP) { /* CNAME loop, let's send what we already have */
769                                 log_debug_errno(r, "Detected CNAME loop, returning what we already have.");
770                                 (void) dns_stub_send_reply(q, q->answer_rcode);
771                                 break;
772                         }
773                         if (cname_result < 0) {
774                                 log_debug_errno(cname_result, "Failed to process CNAME: %m");
775                                 break;
776                         }
777 
778                         if (cname_result == DNS_QUERY_NOMATCH) {
779                                 /* This answer doesn't contain any RR that would answer our question
780                                  * positively, i.e. neither directly nor via CNAME. */
781 
782                                 if (first) /* We never followed a CNAME and the answer doesn't match our
783                                             * question at all? Then this is final, the empty answer is the
784                                             * answer. */
785                                         break;
786 
787                                 /* Otherwise, we already followed a CNAME once within this packet, and the
788                                  * packet doesn't answer our question. In that case let's restart the query,
789                                  * now with the redirected question. We'll */
790                                 r = dns_query_go(q);
791                                 if (r < 0)
792                                         return (void) log_debug_errno(r, "Failed to restart query: %m");
793 
794                                 TAKE_PTR(q);
795                                 return;
796                         }
797 
798                         r = dns_stub_assign_sections(
799                                         q,
800                                         dns_query_question_for_protocol(q, DNS_PROTOCOL_DNS),
801                                         dns_stub_reply_with_edns0_do(q));
802                         if (r < 0)
803                                 return (void) log_debug_errno(r, "Failed to assign sections: %m");
804 
805                         if (cname_result == DNS_QUERY_MATCH) /* A match? Then we are done, let's return what we got */
806                                 break;
807 
808                         /* We followed a CNAME. and collected the RRs that answer the redirected question
809                          * successfully. Let's not try to do this again. */
810                         assert(cname_result == DNS_QUERY_CNAME);
811                         first = false;
812                 }
813 
814                 _fallthrough_;
815         }
816 
817         case DNS_TRANSACTION_RCODE_FAILURE:
818                 (void) dns_stub_send_reply(q, q->answer_rcode);
819                 break;
820 
821         case DNS_TRANSACTION_NOT_FOUND:
822                 (void) dns_stub_send_reply(q, DNS_RCODE_NXDOMAIN);
823                 break;
824 
825         case DNS_TRANSACTION_TIMEOUT:
826         case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
827                 /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */
828                 break;
829 
830         case DNS_TRANSACTION_NO_SERVERS:
831         case DNS_TRANSACTION_INVALID_REPLY:
832         case DNS_TRANSACTION_ERRNO:
833         case DNS_TRANSACTION_ABORTED:
834         case DNS_TRANSACTION_DNSSEC_FAILED:
835         case DNS_TRANSACTION_NO_TRUST_ANCHOR:
836         case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
837         case DNS_TRANSACTION_NETWORK_DOWN:
838         case DNS_TRANSACTION_NO_SOURCE:
839         case DNS_TRANSACTION_STUB_LOOP:
840                 (void) dns_stub_send_reply(q, DNS_RCODE_SERVFAIL);
841                 break;
842 
843         case DNS_TRANSACTION_NULL:
844         case DNS_TRANSACTION_PENDING:
845         case DNS_TRANSACTION_VALIDATING:
846         default:
847                 assert_not_reached();
848         }
849 }
850 
dns_stub_stream_complete(DnsStream * s,int error)851 static int dns_stub_stream_complete(DnsStream *s, int error) {
852         assert(s);
853 
854         log_debug_errno(error, "DNS TCP connection terminated, destroying queries: %m");
855 
856         for (;;) {
857                 DnsQuery *q;
858 
859                 q = set_first(s->queries);
860                 if (!q)
861                         break;
862 
863                 dns_query_free(q);
864         }
865 
866         /* This drops the implicit ref we keep around since it was allocated, as incoming stub connections
867          * should be kept as long as the client wants to. */
868         dns_stream_unref(s);
869         return 0;
870 }
871 
dns_stub_process_query(Manager * m,DnsStubListenerExtra * l,DnsStream * s,DnsPacket * p)872 static void dns_stub_process_query(Manager *m, DnsStubListenerExtra *l, DnsStream *s, DnsPacket *p) {
873         uint64_t protocol_flags = SD_RESOLVED_PROTOCOLS_ALL;
874         _cleanup_(dns_query_freep) DnsQuery *q = NULL;
875         Hashmap **queries_by_packet;
876         DnsQuery *existing;
877         bool bypass = false;
878         int r;
879 
880         assert(m);
881         assert(p);
882         assert(p->protocol == DNS_PROTOCOL_DNS);
883 
884         if (!l && /* l == NULL if this is the main stub */
885             !address_is_proxy(p->family, &p->destination) && /* don't restrict needlessly for 127.0.0.54 */
886             (in_addr_is_localhost(p->family, &p->sender) <= 0 ||
887              in_addr_is_localhost(p->family, &p->destination) <= 0)) {
888                 log_warning("Got packet on unexpected (i.e. non-localhost) IP range, ignoring.");
889                 return;
890         }
891 
892         if (manager_packet_from_our_transaction(m, p)) {
893                 log_debug("Got our own packet looped back, ignoring.");
894                 return;
895         }
896 
897         queries_by_packet = l ? &l->queries_by_packet : &m->stub_queries_by_packet;
898         existing = hashmap_get(*queries_by_packet, p);
899         if (existing && dns_packet_equal(existing->request_packet, p)) {
900                 log_debug("Got repeat packet from client, ignoring.");
901                 return;
902         }
903 
904         r = dns_packet_extract(p);
905         if (r < 0) {
906                 log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m");
907                 dns_stub_send_failure(m, l, s, p, DNS_RCODE_FORMERR, false);
908                 return;
909         }
910 
911         if (!DNS_PACKET_VERSION_SUPPORTED(p)) {
912                 log_debug("Got EDNS OPT field with unsupported version number.");
913                 dns_stub_send_failure(m, l, s, p, DNS_RCODE_BADVERS, false);
914                 return;
915         }
916 
917         if (dns_type_is_obsolete(dns_question_first_key(p->question)->type)) {
918                 log_debug("Got message with obsolete key type, refusing.");
919                 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
920                 return;
921         }
922 
923         if (dns_type_is_zone_transer(dns_question_first_key(p->question)->type)) {
924                 log_debug("Got request for zone transfer, refusing.");
925                 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
926                 return;
927         }
928 
929         if (!DNS_PACKET_RD(p))  {
930                 /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */
931                 log_debug("Got request with recursion disabled, refusing.");
932                 dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
933                 return;
934         }
935 
936         r = hashmap_ensure_allocated(queries_by_packet, &stub_packet_hash_ops);
937         if (r < 0) {
938                 log_oom();
939                 return;
940         }
941 
942         if (address_is_proxy(p->family, &p->destination)) {
943                 _cleanup_free_ char *dipa = NULL;
944 
945                 r = in_addr_to_string(p->family, &p->destination, &dipa);
946                 if (r < 0)
947                         return (void) log_error_errno(r, "Failed to format destination address: %m");
948 
949                 log_debug("Got request to DNS proxy address 127.0.0.54, enabling bypass logic.");
950                 bypass = true;
951                 protocol_flags = SD_RESOLVED_DNS|SD_RESOLVED_NO_ZONE; /* Turn off mDNS/LLMNR for proxy stub. */
952         } else if ((DNS_PACKET_DO(p) && DNS_PACKET_CD(p))) {
953                 log_debug("Got request with DNSSEC checking disabled, enabling bypass logic.");
954                 bypass = true;
955         }
956 
957         if (bypass)
958                 r = dns_query_new(m, &q, NULL, NULL, p, 0,
959                                   protocol_flags|
960                                   SD_RESOLVED_NO_CNAME|
961                                   SD_RESOLVED_NO_SEARCH|
962                                   SD_RESOLVED_NO_VALIDATE|
963                                   SD_RESOLVED_REQUIRE_PRIMARY|
964                                   SD_RESOLVED_CLAMP_TTL);
965         else
966                 r = dns_query_new(m, &q, p->question, p->question, NULL, 0,
967                                   protocol_flags|
968                                   SD_RESOLVED_NO_SEARCH|
969                                   (DNS_PACKET_DO(p) ? SD_RESOLVED_REQUIRE_PRIMARY : 0)|
970                                   SD_RESOLVED_CLAMP_TTL);
971         if (r < 0) {
972                 log_error_errno(r, "Failed to generate query object: %m");
973                 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
974                 return;
975         }
976 
977         q->request_packet = dns_packet_ref(p);
978         q->request_stream = dns_stream_ref(s); /* make sure the stream stays around until we can send a reply through it */
979         q->stub_listener_extra = l;
980         q->complete = dns_stub_query_complete;
981 
982         if (s) {
983                 /* Remember which queries belong to this stream, so that we can cancel them when the stream
984                  * is disconnected early */
985 
986                 r = set_ensure_put(&s->queries, NULL, q);
987                 if (r < 0) {
988                         log_oom();
989                         return;
990                 }
991                 assert(r > 0);
992         }
993 
994         /* Add the query to the hash table we use to determine repeat packets now. We don't care about
995          * failures here, since in the worst case we'll not recognize duplicate incoming requests, which
996          * isn't particularly bad. */
997         (void) hashmap_put(*queries_by_packet, q->request_packet, q);
998 
999         r = dns_query_go(q);
1000         if (r < 0) {
1001                 log_error_errno(r, "Failed to start query: %m");
1002                 dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
1003                 return;
1004         }
1005 
1006         log_debug("Processing query...");
1007         TAKE_PTR(q);
1008 }
1009 
on_dns_stub_packet_internal(sd_event_source * s,int fd,uint32_t revents,Manager * m,DnsStubListenerExtra * l)1010 static int on_dns_stub_packet_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
1011         _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
1012         int r;
1013 
1014         r = manager_recv(m, fd, DNS_PROTOCOL_DNS, &p);
1015         if (r <= 0)
1016                 return r;
1017 
1018         if (dns_packet_validate_query(p) > 0) {
1019                 log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p));
1020 
1021                 dns_stub_process_query(m, l, NULL, p);
1022         } else
1023                 log_debug("Invalid DNS stub UDP packet, ignoring.");
1024 
1025         return 0;
1026 }
1027 
on_dns_stub_packet(sd_event_source * s,int fd,uint32_t revents,void * userdata)1028 static int on_dns_stub_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1029         return on_dns_stub_packet_internal(s, fd, revents, userdata, NULL);
1030 }
1031 
on_dns_stub_packet_extra(sd_event_source * s,int fd,uint32_t revents,void * userdata)1032 static int on_dns_stub_packet_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1033         DnsStubListenerExtra *l = userdata;
1034 
1035         assert(l);
1036 
1037         return on_dns_stub_packet_internal(s, fd, revents, l->manager, l);
1038 }
1039 
on_dns_stub_stream_packet(DnsStream * s,DnsPacket * p)1040 static int on_dns_stub_stream_packet(DnsStream *s, DnsPacket *p) {
1041         assert(s);
1042         assert(s->manager);
1043         assert(p);
1044 
1045         if (dns_packet_validate_query(p) > 0) {
1046                 log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p));
1047 
1048                 dns_stub_process_query(s->manager, s->stub_listener_extra, s, p);
1049         } else
1050                 log_debug("Invalid DNS stub TCP packet, ignoring.");
1051 
1052         return 0;
1053 }
1054 
on_dns_stub_stream_internal(sd_event_source * s,int fd,uint32_t revents,Manager * m,DnsStubListenerExtra * l)1055 static int on_dns_stub_stream_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
1056         DnsStream *stream;
1057         int cfd, r;
1058 
1059         cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
1060         if (cfd < 0) {
1061                 if (ERRNO_IS_ACCEPT_AGAIN(errno))
1062                         return 0;
1063 
1064                 return -errno;
1065         }
1066 
1067         r = dns_stream_new(m, &stream, DNS_STREAM_STUB, DNS_PROTOCOL_DNS, cfd, NULL,
1068                            on_dns_stub_stream_packet, dns_stub_stream_complete, DNS_STREAM_STUB_TIMEOUT_USEC);
1069         if (r < 0) {
1070                 safe_close(cfd);
1071                 return r;
1072         }
1073 
1074         stream->stub_listener_extra = l;
1075 
1076         /* We let the reference to the stream dangle here, it will be dropped later by the complete callback. */
1077 
1078         return 0;
1079 }
1080 
on_dns_stub_stream(sd_event_source * s,int fd,uint32_t revents,void * userdata)1081 static int on_dns_stub_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1082         return on_dns_stub_stream_internal(s, fd, revents, userdata, NULL);
1083 }
1084 
on_dns_stub_stream_extra(sd_event_source * s,int fd,uint32_t revents,void * userdata)1085 static int on_dns_stub_stream_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1086         DnsStubListenerExtra *l = userdata;
1087 
1088         assert(l);
1089         return on_dns_stub_stream_internal(s, fd, revents, l->manager, l);
1090 }
1091 
set_dns_stub_common_socket_options(int fd,int family)1092 static int set_dns_stub_common_socket_options(int fd, int family) {
1093         int r;
1094 
1095         assert(fd >= 0);
1096         assert(IN_SET(family, AF_INET, AF_INET6));
1097 
1098         r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
1099         if (r < 0)
1100                 return r;
1101 
1102         r = socket_set_recvpktinfo(fd, family, true);
1103         if (r < 0)
1104                 return r;
1105 
1106         r = socket_set_recvttl(fd, family, true);
1107         if (r < 0)
1108                 return r;
1109 
1110         return 0;
1111 }
1112 
set_dns_stub_common_tcp_socket_options(int fd)1113 static int set_dns_stub_common_tcp_socket_options(int fd) {
1114         int r;
1115 
1116         assert(fd >= 0);
1117 
1118         r = setsockopt_int(fd, IPPROTO_TCP, TCP_FASTOPEN, 5); /* Everybody appears to pick qlen=5, let's do the same here. */
1119         if (r < 0)
1120                 log_debug_errno(r, "Failed to enable TCP_FASTOPEN on TCP listening socket, ignoring: %m");
1121 
1122         r = setsockopt_int(fd, IPPROTO_TCP, TCP_NODELAY, true);
1123         if (r < 0)
1124                 log_debug_errno(r, "Failed to enable TCP_NODELAY mode, ignoring: %m");
1125 
1126         return 0;
1127 }
1128 
manager_dns_stub_fd(Manager * m,int family,const union in_addr_union * listen_addr,int type)1129 static int manager_dns_stub_fd(
1130                 Manager *m,
1131                 int family,
1132                 const union in_addr_union *listen_addr,
1133                 int type) {
1134 
1135         sd_event_source **event_source;
1136         _cleanup_close_ int fd = -1;
1137         union sockaddr_union sa;
1138         int r;
1139 
1140         assert(m);
1141         assert(listen_addr);
1142 
1143         if (type == SOCK_DGRAM)
1144                 event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_udp_event_source : &m->dns_stub_udp_event_source;
1145         else if (type == SOCK_STREAM)
1146                 event_source = address_is_proxy(family, listen_addr) ? &m->dns_proxy_stub_tcp_event_source : &m->dns_stub_tcp_event_source;
1147         else
1148                 return -EPROTONOSUPPORT;
1149 
1150         if (*event_source)
1151                 return sd_event_source_get_io_fd(*event_source);
1152 
1153         fd = socket(family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
1154         if (fd < 0)
1155                 return -errno;
1156 
1157         r = set_dns_stub_common_socket_options(fd, family);
1158         if (r < 0)
1159                 return r;
1160 
1161         if (type == SOCK_STREAM) {
1162                 r = set_dns_stub_common_tcp_socket_options(fd);
1163                 if (r < 0)
1164                         return r;
1165         }
1166 
1167         /* Set slightly different socket options for the non-proxy and the proxy binding. The former we want
1168          * to be accessible only from the local host, for the latter it's OK if people use NAT redirects or
1169          * so to redirect external traffic to it. */
1170 
1171         if (!address_is_proxy(family, listen_addr)) {
1172                 /* Make sure no traffic from outside the local host can leak to onto this socket */
1173                 r = socket_bind_to_ifindex(fd, LOOPBACK_IFINDEX);
1174                 if (r < 0)
1175                         return r;
1176 
1177                 r = socket_set_ttl(fd, family, 1);
1178                 if (r < 0)
1179                         return r;
1180         } else if (type == SOCK_DGRAM) {
1181                 /* Turn off Path MTU Discovery for UDP, for security reasons. See socket_disable_pmtud() for
1182                  * a longer discussion. (We only do this for sockets that are potentially externally
1183                  * accessible, i.e. the proxy stub one. For the non-proxy one we instead set the TTL to 1,
1184                  * see above, so that packets don't get routed at all.) */
1185                 r = socket_disable_pmtud(fd, family);
1186                 if (r < 0)
1187                         log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
1188 
1189                 r = socket_set_recvfragsize(fd, family, true);
1190                 if (r < 0)
1191                         log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
1192         }
1193 
1194         r = sockaddr_set_in_addr(&sa, family, listen_addr, 53);
1195         if (r < 0)
1196                 return r;
1197 
1198         if (bind(fd, &sa.sa, sizeof(sa.in)) < 0)
1199                 return -errno;
1200 
1201         if (type == SOCK_STREAM &&
1202             listen(fd, SOMAXCONN) < 0)
1203                 return -errno;
1204 
1205         r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1206                             type == SOCK_DGRAM ? on_dns_stub_packet : on_dns_stub_stream,
1207                             m);
1208         if (r < 0)
1209                 return r;
1210 
1211         r = sd_event_source_set_io_fd_own(*event_source, true);
1212         if (r < 0)
1213                 return r;
1214 
1215         (void) sd_event_source_set_description(*event_source,
1216                                                type == SOCK_DGRAM ? "dns-stub-udp" : "dns-stub-tcp");
1217 
1218         return TAKE_FD(fd);
1219 }
1220 
manager_dns_stub_fd_extra(Manager * m,DnsStubListenerExtra * l,int type)1221 static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type) {
1222         _cleanup_free_ char *pretty = NULL;
1223         _cleanup_close_ int fd = -1;
1224         union sockaddr_union sa;
1225         int r;
1226 
1227         assert(m);
1228         assert(l);
1229         assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
1230 
1231         sd_event_source **event_source = type == SOCK_DGRAM ? &l->udp_event_source : &l->tcp_event_source;
1232         if (*event_source)
1233                 return sd_event_source_get_io_fd(*event_source);
1234 
1235         if (l->family == AF_INET)
1236                 sa = (union sockaddr_union) {
1237                         .in.sin_family = l->family,
1238                         .in.sin_port = htobe16(dns_stub_listener_extra_port(l)),
1239                         .in.sin_addr = l->address.in,
1240                 };
1241         else
1242                 sa = (union sockaddr_union) {
1243                         .in6.sin6_family = l->family,
1244                         .in6.sin6_port = htobe16(dns_stub_listener_extra_port(l)),
1245                         .in6.sin6_addr = l->address.in6,
1246                 };
1247 
1248         fd = socket(l->family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
1249         if (fd < 0) {
1250                 r = -errno;
1251                 goto fail;
1252         }
1253 
1254         r = set_dns_stub_common_socket_options(fd, l->family);
1255         if (r < 0)
1256                 goto fail;
1257 
1258         if (type == SOCK_STREAM) {
1259                 r = set_dns_stub_common_tcp_socket_options(fd);
1260                 if (r < 0)
1261                         goto fail;
1262         }
1263 
1264         /* Do not set IP_TTL for extra DNS stub listeners, as the address may not be local and in that case
1265          * people may want ttl > 1. */
1266 
1267         r = socket_set_freebind(fd, l->family, true);
1268         if (r < 0)
1269                 goto fail;
1270 
1271         if (type == SOCK_DGRAM) {
1272                 r = socket_disable_pmtud(fd, l->family);
1273                 if (r < 0)
1274                         log_debug_errno(r, "Failed to disable UDP PMTUD, ignoring: %m");
1275 
1276                 r = socket_set_recvfragsize(fd, l->family, true);
1277                 if (r < 0)
1278                         log_debug_errno(r, "Failed to enable fragment size reception, ignoring: %m");
1279         }
1280 
1281         r = RET_NERRNO(bind(fd, &sa.sa, SOCKADDR_LEN(sa)));
1282         if (r < 0)
1283                 goto fail;
1284 
1285         if (type == SOCK_STREAM &&
1286             listen(fd, SOMAXCONN) < 0) {
1287                 r = -errno;
1288                 goto fail;
1289         }
1290 
1291         r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
1292                             type == SOCK_DGRAM ? on_dns_stub_packet_extra : on_dns_stub_stream_extra,
1293                             l);
1294         if (r < 0)
1295                 goto fail;
1296 
1297         r = sd_event_source_set_io_fd_own(*event_source, true);
1298         if (r < 0)
1299                 goto fail;
1300 
1301         (void) sd_event_source_set_description(*event_source,
1302                                                type == SOCK_DGRAM ? "dns-stub-udp-extra" : "dns-stub-tcp-extra");
1303 
1304         if (DEBUG_LOGGING) {
1305                 (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
1306                 log_debug("Listening on %s socket %s.",
1307                           type == SOCK_DGRAM ? "UDP" : "TCP",
1308                           strnull(pretty));
1309         }
1310 
1311         return TAKE_FD(fd);
1312 
1313 fail:
1314         assert(r < 0);
1315         (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
1316         return log_warning_errno(r,
1317                                  r == -EADDRINUSE ? "Another process is already listening on %s socket %s: %m" :
1318                                                     "Failed to listen on %s socket %s: %m",
1319                                  type == SOCK_DGRAM ? "UDP" : "TCP",
1320                                  strnull(pretty));
1321 }
1322 
manager_dns_stub_start(Manager * m)1323 int manager_dns_stub_start(Manager *m) {
1324         int r;
1325 
1326         assert(m);
1327 
1328         if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_NO)
1329                 log_debug("Not creating stub listener.");
1330         else {
1331                 static const struct {
1332                         uint32_t addr;
1333                         int socket_type;
1334                 } stub_sockets[] = {
1335                         { INADDR_DNS_STUB,       SOCK_DGRAM  },
1336                         { INADDR_DNS_STUB,       SOCK_STREAM },
1337                         { INADDR_DNS_PROXY_STUB, SOCK_DGRAM  },
1338                         { INADDR_DNS_PROXY_STUB, SOCK_STREAM },
1339                 };
1340 
1341                 log_debug("Creating stub listener using %s.",
1342                           m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP ? "UDP" :
1343                           m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP ? "TCP" :
1344                           "UDP/TCP");
1345 
1346                 for (size_t i = 0; i < ELEMENTSOF(stub_sockets); i++) {
1347                         union in_addr_union a = {
1348                                 .in.s_addr = htobe32(stub_sockets[i].addr),
1349                         };
1350 
1351                         if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP && stub_sockets[i].socket_type == SOCK_STREAM)
1352                                 continue;
1353                         if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP && stub_sockets[i].socket_type == SOCK_DGRAM)
1354                                 continue;
1355 
1356                         r = manager_dns_stub_fd(m, AF_INET, &a, stub_sockets[i].socket_type);
1357                         if (r < 0) {
1358                                 _cleanup_free_ char *busy_socket = NULL;
1359 
1360                                 if (asprintf(&busy_socket,
1361                                              "%s socket " IPV4_ADDRESS_FMT_STR ":53",
1362                                              stub_sockets[i].socket_type == SOCK_DGRAM ? "UDP" : "TCP",
1363                                              IPV4_ADDRESS_FMT_VAL(a.in)) < 0)
1364                                         return log_oom();
1365 
1366                                 if (IN_SET(r, -EADDRINUSE, -EPERM)) {
1367                                         log_warning_errno(r,
1368                                                           r == -EADDRINUSE ? "Another process is already listening on %s.\n"
1369                                                           "Turning off local DNS stub support." :
1370                                                           "Failed to listen on %s: %m.\n"
1371                                           "Turning off local DNS stub support.",
1372                                                           busy_socket);
1373                                         manager_dns_stub_stop(m);
1374                                         break;
1375                                 }
1376 
1377                                 return log_error_errno(r, "Failed to listen on %s: %m", busy_socket);
1378                         }
1379                 }
1380         }
1381 
1382         if (!ordered_set_isempty(m->dns_extra_stub_listeners)) {
1383                 DnsStubListenerExtra *l;
1384 
1385                 log_debug("Creating extra stub listeners.");
1386 
1387                 ORDERED_SET_FOREACH(l, m->dns_extra_stub_listeners) {
1388                         if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_UDP))
1389                                 (void) manager_dns_stub_fd_extra(m, l, SOCK_DGRAM);
1390                         if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_TCP))
1391                                 (void) manager_dns_stub_fd_extra(m, l, SOCK_STREAM);
1392                 }
1393         }
1394 
1395         return 0;
1396 }
1397 
manager_dns_stub_stop(Manager * m)1398 void manager_dns_stub_stop(Manager *m) {
1399         assert(m);
1400 
1401         m->dns_stub_udp_event_source = sd_event_source_disable_unref(m->dns_stub_udp_event_source);
1402         m->dns_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_stub_tcp_event_source);
1403         m->dns_proxy_stub_udp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_udp_event_source);
1404         m->dns_proxy_stub_tcp_event_source = sd_event_source_disable_unref(m->dns_proxy_stub_tcp_event_source);
1405 }
1406 
1407 static const char* const dns_stub_listener_mode_table[_DNS_STUB_LISTENER_MODE_MAX] = {
1408         [DNS_STUB_LISTENER_NO]  = "no",
1409         [DNS_STUB_LISTENER_UDP] = "udp",
1410         [DNS_STUB_LISTENER_TCP] = "tcp",
1411         [DNS_STUB_LISTENER_YES] = "yes",
1412 };
1413 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode, DnsStubListenerMode, DNS_STUB_LISTENER_YES);
1414