1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <malloc.h>
4 #include <netinet/in.h>
5 #include <stdbool.h>
6 #include <unistd.h>
7 
8 #include "sd-netlink.h"
9 
10 #include "alloc-util.h"
11 #include "fd-util.h"
12 #include "format-util.h"
13 #include "io-util.h"
14 #include "netlink-internal.h"
15 #include "netlink-types.h"
16 #include "netlink-util.h"
17 #include "socket-util.h"
18 #include "util.h"
19 
socket_open(int family)20 int socket_open(int family) {
21         int fd;
22 
23         fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, family);
24         if (fd < 0)
25                 return -errno;
26 
27         return fd_move_above_stdio(fd);
28 }
29 
broadcast_groups_get(sd_netlink * nl)30 static int broadcast_groups_get(sd_netlink *nl) {
31         _cleanup_free_ uint32_t *groups = NULL;
32         socklen_t len = 0, old_len;
33         int r;
34 
35         assert(nl);
36         assert(nl->fd >= 0);
37 
38         if (getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, NULL, &len) < 0) {
39                 if (errno != ENOPROTOOPT)
40                         return -errno;
41 
42                 nl->broadcast_group_dont_leave = true;
43                 return 0;
44         }
45 
46         if (len == 0)
47                 return 0;
48 
49         groups = new0(uint32_t, len);
50         if (!groups)
51                 return -ENOMEM;
52 
53         old_len = len;
54 
55         if (getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, groups, &len) < 0)
56                 return -errno;
57 
58         if (old_len != len)
59                 return -EIO;
60 
61         for (unsigned i = 0; i < len; i++)
62                 for (unsigned j = 0; j < sizeof(uint32_t) * 8; j++)
63                         if (groups[i] & (1U << j)) {
64                                 unsigned group = i * sizeof(uint32_t) * 8 + j + 1;
65 
66                                 r = hashmap_ensure_put(&nl->broadcast_group_refs, NULL, UINT_TO_PTR(group), UINT_TO_PTR(1));
67                                 if (r < 0)
68                                         return r;
69                         }
70 
71         return 0;
72 }
73 
socket_bind(sd_netlink * nl)74 int socket_bind(sd_netlink *nl) {
75         socklen_t addrlen;
76         int r;
77 
78         r = setsockopt_int(nl->fd, SOL_NETLINK, NETLINK_PKTINFO, true);
79         if (r < 0)
80                 return r;
81 
82         addrlen = sizeof(nl->sockaddr);
83 
84         /* ignore EINVAL to allow binding an already bound socket */
85         if (bind(nl->fd, &nl->sockaddr.sa, addrlen) < 0 && errno != EINVAL)
86                 return -errno;
87 
88         if (getsockname(nl->fd, &nl->sockaddr.sa, &addrlen) < 0)
89                 return -errno;
90 
91         return broadcast_groups_get(nl);
92 }
93 
broadcast_group_get_ref(sd_netlink * nl,unsigned group)94 static unsigned broadcast_group_get_ref(sd_netlink *nl, unsigned group) {
95         assert(nl);
96 
97         return PTR_TO_UINT(hashmap_get(nl->broadcast_group_refs, UINT_TO_PTR(group)));
98 }
99 
broadcast_group_set_ref(sd_netlink * nl,unsigned group,unsigned n_ref)100 static int broadcast_group_set_ref(sd_netlink *nl, unsigned group, unsigned n_ref) {
101         int r;
102 
103         assert(nl);
104 
105         r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL);
106         if (r < 0)
107                 return r;
108 
109         return hashmap_replace(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(n_ref));
110 }
111 
broadcast_group_join(sd_netlink * nl,unsigned group)112 static int broadcast_group_join(sd_netlink *nl, unsigned group) {
113         assert(nl);
114         assert(nl->fd >= 0);
115         assert(group > 0);
116 
117         /* group is "unsigned", but netlink(7) says the argument for NETLINK_ADD_MEMBERSHIP is "int" */
118         return setsockopt_int(nl->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, group);
119 }
120 
socket_broadcast_group_ref(sd_netlink * nl,unsigned group)121 int socket_broadcast_group_ref(sd_netlink *nl, unsigned group) {
122         unsigned n_ref;
123         int r;
124 
125         assert(nl);
126 
127         n_ref = broadcast_group_get_ref(nl, group);
128 
129         n_ref++;
130 
131         r = broadcast_group_set_ref(nl, group, n_ref);
132         if (r < 0)
133                 return r;
134 
135         if (n_ref > 1)
136                 /* already in the group */
137                 return 0;
138 
139         return broadcast_group_join(nl, group);
140 }
141 
broadcast_group_leave(sd_netlink * nl,unsigned group)142 static int broadcast_group_leave(sd_netlink *nl, unsigned group) {
143         assert(nl);
144         assert(nl->fd >= 0);
145         assert(group > 0);
146 
147         if (nl->broadcast_group_dont_leave)
148                 return 0;
149 
150         /* group is "unsigned", but netlink(7) says the argument for NETLINK_DROP_MEMBERSHIP is "int" */
151         return setsockopt_int(nl->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, group);
152 }
153 
socket_broadcast_group_unref(sd_netlink * nl,unsigned group)154 int socket_broadcast_group_unref(sd_netlink *nl, unsigned group) {
155         unsigned n_ref;
156         int r;
157 
158         assert(nl);
159 
160         n_ref = broadcast_group_get_ref(nl, group);
161         if (n_ref == 0)
162                 return 0;
163 
164         n_ref--;
165 
166         r = broadcast_group_set_ref(nl, group, n_ref);
167         if (r < 0)
168                 return r;
169 
170         if (n_ref > 0)
171                 /* still refs left */
172                 return 0;
173 
174         return broadcast_group_leave(nl, group);
175 }
176 
177 /* returns the number of bytes sent, or a negative error code */
socket_write_message(sd_netlink * nl,sd_netlink_message * m)178 int socket_write_message(sd_netlink *nl, sd_netlink_message *m) {
179         union sockaddr_union addr = {
180                 .nl.nl_family = AF_NETLINK,
181         };
182         ssize_t k;
183 
184         assert(nl);
185         assert(m);
186         assert(m->hdr);
187 
188         k = sendto(nl->fd, m->hdr, m->hdr->nlmsg_len, 0, &addr.sa, sizeof(addr));
189         if (k < 0)
190                 return -errno;
191 
192         return k;
193 }
194 
socket_writev_message(sd_netlink * nl,sd_netlink_message ** m,size_t msgcount)195 int socket_writev_message(sd_netlink *nl, sd_netlink_message **m, size_t msgcount) {
196         _cleanup_free_ struct iovec *iovs = NULL;
197         ssize_t k;
198 
199         assert(nl);
200         assert(m);
201         assert(msgcount > 0);
202 
203         iovs = new(struct iovec, msgcount);
204         if (!iovs)
205                 return -ENOMEM;
206 
207         for (size_t i = 0; i < msgcount; i++) {
208                 assert(m[i]->hdr);
209                 assert(m[i]->hdr->nlmsg_len > 0);
210 
211                 iovs[i] = IOVEC_MAKE(m[i]->hdr, m[i]->hdr->nlmsg_len);
212         }
213 
214         k = writev(nl->fd, iovs, msgcount);
215         if (k < 0)
216                 return -errno;
217 
218         return k;
219 }
220 
socket_recv_message(int fd,struct iovec * iov,uint32_t * ret_mcast_group,bool peek)221 static int socket_recv_message(int fd, struct iovec *iov, uint32_t *ret_mcast_group, bool peek) {
222         union sockaddr_union sender;
223         CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct nl_pktinfo))) control;
224         struct msghdr msg = {
225                 .msg_iov = iov,
226                 .msg_iovlen = 1,
227                 .msg_name = &sender,
228                 .msg_namelen = sizeof(sender),
229                 .msg_control = &control,
230                 .msg_controllen = sizeof(control),
231         };
232         ssize_t n;
233 
234         assert(fd >= 0);
235         assert(iov);
236 
237         n = recvmsg_safe(fd, &msg, MSG_TRUNC | (peek ? MSG_PEEK : 0));
238         if (n < 0) {
239                 if (n == -ENOBUFS)
240                         return log_debug_errno(n, "sd-netlink: kernel receive buffer overrun");
241                 if (ERRNO_IS_TRANSIENT(n))
242                         return 0;
243                 return (int) n;
244         }
245 
246         if (sender.nl.nl_pid != 0) {
247                 /* not from the kernel, ignore */
248                 log_debug("sd-netlink: ignoring message from PID %"PRIu32, sender.nl.nl_pid);
249 
250                 if (peek) {
251                         /* drop the message */
252                         n = recvmsg_safe(fd, &msg, 0);
253                         if (n < 0)
254                                 return (int) n;
255                 }
256 
257                 return 0;
258         }
259 
260         if (ret_mcast_group) {
261                 struct nl_pktinfo *pi;
262 
263                 pi = CMSG_FIND_DATA(&msg, SOL_NETLINK, NETLINK_PKTINFO, struct nl_pktinfo);
264                 if (pi)
265                         *ret_mcast_group = pi->group;
266                 else
267                         *ret_mcast_group = 0;
268         }
269 
270         return (int) n;
271 }
272 
273 /* On success, the number of bytes received is returned and *ret points to the received message
274  * which has a valid header and the correct size.
275  * If nothing useful was received 0 is returned.
276  * On failure, a negative error code is returned.
277  */
socket_read_message(sd_netlink * nl)278 int socket_read_message(sd_netlink *nl) {
279         _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *first = NULL;
280         bool multi_part = false, done = false;
281         size_t len, allocated;
282         struct iovec iov = {};
283         uint32_t group = 0;
284         unsigned i = 0;
285         int r;
286 
287         assert(nl);
288         assert(nl->rbuffer);
289 
290         /* read nothing, just get the pending message size */
291         r = socket_recv_message(nl->fd, &iov, NULL, true);
292         if (r <= 0)
293                 return r;
294         else
295                 len = (size_t) r;
296 
297         /* make room for the pending message */
298         if (!greedy_realloc((void**) &nl->rbuffer, len, sizeof(uint8_t)))
299                 return -ENOMEM;
300 
301         allocated = MALLOC_SIZEOF_SAFE(nl->rbuffer);
302         iov = IOVEC_MAKE(nl->rbuffer, allocated);
303 
304         /* read the pending message */
305         r = socket_recv_message(nl->fd, &iov, &group, false);
306         if (r <= 0)
307                 return r;
308         else
309                 len = (size_t) r;
310 
311         if (len > allocated)
312                 /* message did not fit in read buffer */
313                 return -EIO;
314 
315         if (NLMSG_OK(nl->rbuffer, len) && nl->rbuffer->nlmsg_flags & NLM_F_MULTI) {
316                 multi_part = true;
317 
318                 for (i = 0; i < nl->rqueue_partial_size; i++)
319                         if (message_get_serial(nl->rqueue_partial[i]) ==
320                             nl->rbuffer->nlmsg_seq) {
321                                 first = nl->rqueue_partial[i];
322                                 break;
323                         }
324         }
325 
326         for (struct nlmsghdr *new_msg = nl->rbuffer; NLMSG_OK(new_msg, len) && !done; new_msg = NLMSG_NEXT(new_msg, len)) {
327                 _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
328                 size_t size;
329 
330                 if (group == 0 && new_msg->nlmsg_pid != nl->sockaddr.nl.nl_pid)
331                         /* not broadcast and not for us */
332                         continue;
333 
334                 if (new_msg->nlmsg_type == NLMSG_NOOP)
335                         /* silently drop noop messages */
336                         continue;
337 
338                 if (new_msg->nlmsg_type == NLMSG_DONE) {
339                         /* finished reading multi-part message */
340                         done = true;
341 
342                         /* if first is not defined, put NLMSG_DONE into the receive queue. */
343                         if (first)
344                                 continue;
345                 }
346 
347                 /* check that we support this message type */
348                 r = type_system_root_get_type_system_and_header_size(nl, new_msg->nlmsg_type, NULL, &size);
349                 if (r < 0) {
350                         if (r == -EOPNOTSUPP)
351                                 log_debug("sd-netlink: ignored message with unknown type: %i",
352                                           new_msg->nlmsg_type);
353 
354                         continue;
355                 }
356 
357                 /* check that the size matches the message type */
358                 if (new_msg->nlmsg_len < NLMSG_LENGTH(size)) {
359                         log_debug("sd-netlink: message is shorter than expected, dropping");
360                         continue;
361                 }
362 
363                 r = message_new_empty(nl, &m);
364                 if (r < 0)
365                         return r;
366 
367                 m->multicast_group = group;
368                 m->hdr = memdup(new_msg, new_msg->nlmsg_len);
369                 if (!m->hdr)
370                         return -ENOMEM;
371 
372                 /* seal and parse the top-level message */
373                 r = sd_netlink_message_rewind(m, nl);
374                 if (r < 0)
375                         return r;
376 
377                 /* push the message onto the multi-part message stack */
378                 if (first)
379                         m->next = first;
380                 first = TAKE_PTR(m);
381         }
382 
383         if (len > 0)
384                 log_debug("sd-netlink: discarding %zu bytes of incoming message", len);
385 
386         if (!first)
387                 return 0;
388 
389         if (!multi_part || done) {
390                 /* we got a complete message, push it on the read queue */
391                 r = netlink_rqueue_make_room(nl);
392                 if (r < 0)
393                         return r;
394 
395                 nl->rqueue[nl->rqueue_size++] = TAKE_PTR(first);
396 
397                 if (multi_part && (i < nl->rqueue_partial_size)) {
398                         /* remove the message form the partial read queue */
399                         memmove(nl->rqueue_partial + i, nl->rqueue_partial + i + 1,
400                                 sizeof(sd_netlink_message*) * (nl->rqueue_partial_size - i - 1));
401                         nl->rqueue_partial_size--;
402                 }
403 
404                 return 1;
405         } else {
406                 /* we only got a partial multi-part message, push it on the
407                    partial read queue */
408                 if (i < nl->rqueue_partial_size)
409                         nl->rqueue_partial[i] = TAKE_PTR(first);
410                 else {
411                         r = netlink_rqueue_partial_make_room(nl);
412                         if (r < 0)
413                                 return r;
414 
415                         nl->rqueue_partial[nl->rqueue_partial_size++] = TAKE_PTR(first);
416                 }
417 
418                 return 0;
419         }
420 }
421