1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * net/dccp/proto.c
4 *
5 * An implementation of the DCCP protocol
6 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 */
8
9 #include <linux/dccp.h>
10 #include <linux/module.h>
11 #include <linux/types.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/if_arp.h>
18 #include <linux/init.h>
19 #include <linux/random.h>
20 #include <linux/slab.h>
21 #include <net/checksum.h>
22
23 #include <net/inet_sock.h>
24 #include <net/inet_common.h>
25 #include <net/sock.h>
26 #include <net/xfrm.h>
27
28 #include <asm/ioctls.h>
29 #include <linux/spinlock.h>
30 #include <linux/timer.h>
31 #include <linux/delay.h>
32 #include <linux/poll.h>
33
34 #include "ccid.h"
35 #include "dccp.h"
36 #include "feat.h"
37
38 #define CREATE_TRACE_POINTS
39 #include "trace.h"
40
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44
45 DEFINE_PER_CPU(unsigned int, dccp_orphan_count);
46 EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53
54 #ifdef CONFIG_IP_DCCP_DEBUG
dccp_state_name(const int state)55 static const char *dccp_state_name(const int state)
56 {
57 static const char *const dccp_state_names[] = {
58 [DCCP_OPEN] = "OPEN",
59 [DCCP_REQUESTING] = "REQUESTING",
60 [DCCP_PARTOPEN] = "PARTOPEN",
61 [DCCP_LISTEN] = "LISTEN",
62 [DCCP_RESPOND] = "RESPOND",
63 [DCCP_CLOSING] = "CLOSING",
64 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
65 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
66 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
67 [DCCP_TIME_WAIT] = "TIME_WAIT",
68 [DCCP_CLOSED] = "CLOSED",
69 };
70
71 if (state >= DCCP_MAX_STATES)
72 return "INVALID STATE!";
73 else
74 return dccp_state_names[state];
75 }
76 #endif
77
dccp_set_state(struct sock * sk,const int state)78 void dccp_set_state(struct sock *sk, const int state)
79 {
80 const int oldstate = sk->sk_state;
81
82 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
83 dccp_state_name(oldstate), dccp_state_name(state));
84 WARN_ON(state == oldstate);
85
86 switch (state) {
87 case DCCP_OPEN:
88 if (oldstate != DCCP_OPEN)
89 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90 /* Client retransmits all Confirm options until entering OPEN */
91 if (oldstate == DCCP_PARTOPEN)
92 dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93 break;
94
95 case DCCP_CLOSED:
96 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97 oldstate == DCCP_CLOSING)
98 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99
100 sk->sk_prot->unhash(sk);
101 if (inet_csk(sk)->icsk_bind_hash != NULL &&
102 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103 inet_put_port(sk);
104 fallthrough;
105 default:
106 if (oldstate == DCCP_OPEN)
107 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108 }
109
110 /* Change state AFTER socket is unhashed to avoid closed
111 * socket sitting in hash tables.
112 */
113 inet_sk_set_state(sk, state);
114 }
115
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117
dccp_finish_passive_close(struct sock * sk)118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120 switch (sk->sk_state) {
121 case DCCP_PASSIVE_CLOSE:
122 /* Node (client or server) has received Close packet. */
123 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124 dccp_set_state(sk, DCCP_CLOSED);
125 break;
126 case DCCP_PASSIVE_CLOSEREQ:
127 /*
128 * Client received CloseReq. We set the `active' flag so that
129 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130 */
131 dccp_send_close(sk, 1);
132 dccp_set_state(sk, DCCP_CLOSING);
133 }
134 }
135
dccp_done(struct sock * sk)136 void dccp_done(struct sock *sk)
137 {
138 dccp_set_state(sk, DCCP_CLOSED);
139 dccp_clear_xmit_timers(sk);
140
141 sk->sk_shutdown = SHUTDOWN_MASK;
142
143 if (!sock_flag(sk, SOCK_DEAD))
144 sk->sk_state_change(sk);
145 else
146 inet_csk_destroy_sock(sk);
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_done);
150
dccp_packet_name(const int type)151 const char *dccp_packet_name(const int type)
152 {
153 static const char *const dccp_packet_names[] = {
154 [DCCP_PKT_REQUEST] = "REQUEST",
155 [DCCP_PKT_RESPONSE] = "RESPONSE",
156 [DCCP_PKT_DATA] = "DATA",
157 [DCCP_PKT_ACK] = "ACK",
158 [DCCP_PKT_DATAACK] = "DATAACK",
159 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160 [DCCP_PKT_CLOSE] = "CLOSE",
161 [DCCP_PKT_RESET] = "RESET",
162 [DCCP_PKT_SYNC] = "SYNC",
163 [DCCP_PKT_SYNCACK] = "SYNCACK",
164 };
165
166 if (type >= DCCP_NR_PKT_TYPES)
167 return "INVALID";
168 else
169 return dccp_packet_names[type];
170 }
171
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173
dccp_sk_destruct(struct sock * sk)174 static void dccp_sk_destruct(struct sock *sk)
175 {
176 struct dccp_sock *dp = dccp_sk(sk);
177
178 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179 dp->dccps_hc_tx_ccid = NULL;
180 inet_sock_destruct(sk);
181 }
182
dccp_init_sock(struct sock * sk,const __u8 ctl_sock_initialized)183 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
184 {
185 struct dccp_sock *dp = dccp_sk(sk);
186 struct inet_connection_sock *icsk = inet_csk(sk);
187
188 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
189 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
190 sk->sk_state = DCCP_CLOSED;
191 sk->sk_write_space = dccp_write_space;
192 sk->sk_destruct = dccp_sk_destruct;
193 icsk->icsk_sync_mss = dccp_sync_mss;
194 dp->dccps_mss_cache = 536;
195 dp->dccps_rate_last = jiffies;
196 dp->dccps_role = DCCP_ROLE_UNDEFINED;
197 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
198 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
199
200 dccp_init_xmit_timers(sk);
201
202 INIT_LIST_HEAD(&dp->dccps_featneg);
203 /* control socket doesn't need feat nego */
204 if (likely(ctl_sock_initialized))
205 return dccp_feat_init(sk);
206 return 0;
207 }
208
209 EXPORT_SYMBOL_GPL(dccp_init_sock);
210
dccp_destroy_sock(struct sock * sk)211 void dccp_destroy_sock(struct sock *sk)
212 {
213 struct dccp_sock *dp = dccp_sk(sk);
214
215 __skb_queue_purge(&sk->sk_write_queue);
216 if (sk->sk_send_head != NULL) {
217 kfree_skb(sk->sk_send_head);
218 sk->sk_send_head = NULL;
219 }
220
221 /* Clean up a referenced DCCP bind bucket. */
222 if (inet_csk(sk)->icsk_bind_hash != NULL)
223 inet_put_port(sk);
224
225 kfree(dp->dccps_service_list);
226 dp->dccps_service_list = NULL;
227
228 if (dp->dccps_hc_rx_ackvec != NULL) {
229 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
230 dp->dccps_hc_rx_ackvec = NULL;
231 }
232 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
233 dp->dccps_hc_rx_ccid = NULL;
234
235 /* clean up feature negotiation state */
236 dccp_feat_list_purge(&dp->dccps_featneg);
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
240
dccp_need_reset(int state)241 static inline int dccp_need_reset(int state)
242 {
243 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
244 state != DCCP_REQUESTING;
245 }
246
dccp_disconnect(struct sock * sk,int flags)247 int dccp_disconnect(struct sock *sk, int flags)
248 {
249 struct inet_connection_sock *icsk = inet_csk(sk);
250 struct inet_sock *inet = inet_sk(sk);
251 struct dccp_sock *dp = dccp_sk(sk);
252 const int old_state = sk->sk_state;
253
254 if (old_state != DCCP_CLOSED)
255 dccp_set_state(sk, DCCP_CLOSED);
256
257 /*
258 * This corresponds to the ABORT function of RFC793, sec. 3.8
259 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
260 */
261 if (old_state == DCCP_LISTEN) {
262 inet_csk_listen_stop(sk);
263 } else if (dccp_need_reset(old_state)) {
264 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
265 sk->sk_err = ECONNRESET;
266 } else if (old_state == DCCP_REQUESTING)
267 sk->sk_err = ECONNRESET;
268
269 dccp_clear_xmit_timers(sk);
270 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
271 dp->dccps_hc_rx_ccid = NULL;
272
273 __skb_queue_purge(&sk->sk_receive_queue);
274 __skb_queue_purge(&sk->sk_write_queue);
275 if (sk->sk_send_head != NULL) {
276 __kfree_skb(sk->sk_send_head);
277 sk->sk_send_head = NULL;
278 }
279
280 inet->inet_dport = 0;
281
282 inet_bhash2_reset_saddr(sk);
283
284 sk->sk_shutdown = 0;
285 sock_reset_flag(sk, SOCK_DONE);
286
287 icsk->icsk_backoff = 0;
288 inet_csk_delack_init(sk);
289 __sk_dst_reset(sk);
290
291 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
292
293 sk_error_report(sk);
294 return 0;
295 }
296
297 EXPORT_SYMBOL_GPL(dccp_disconnect);
298
299 /*
300 * Wait for a DCCP event.
301 *
302 * Note that we don't need to lock the socket, as the upper poll layers
303 * take care of normal races (between the test and the event) and we don't
304 * go look at any of the socket buffers directly.
305 */
dccp_poll(struct file * file,struct socket * sock,poll_table * wait)306 __poll_t dccp_poll(struct file *file, struct socket *sock,
307 poll_table *wait)
308 {
309 __poll_t mask;
310 struct sock *sk = sock->sk;
311
312 sock_poll_wait(file, sock, wait);
313 if (sk->sk_state == DCCP_LISTEN)
314 return inet_csk_listen_poll(sk);
315
316 /* Socket is not locked. We are protected from async events
317 by poll logic and correct handling of state changes
318 made by another threads is impossible in any case.
319 */
320
321 mask = 0;
322 if (sk->sk_err)
323 mask = EPOLLERR;
324
325 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
326 mask |= EPOLLHUP;
327 if (sk->sk_shutdown & RCV_SHUTDOWN)
328 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
329
330 /* Connected? */
331 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
332 if (atomic_read(&sk->sk_rmem_alloc) > 0)
333 mask |= EPOLLIN | EPOLLRDNORM;
334
335 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
336 if (sk_stream_is_writeable(sk)) {
337 mask |= EPOLLOUT | EPOLLWRNORM;
338 } else { /* send SIGIO later */
339 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
340 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
341
342 /* Race breaker. If space is freed after
343 * wspace test but before the flags are set,
344 * IO signal will be lost.
345 */
346 if (sk_stream_is_writeable(sk))
347 mask |= EPOLLOUT | EPOLLWRNORM;
348 }
349 }
350 }
351 return mask;
352 }
353
354 EXPORT_SYMBOL_GPL(dccp_poll);
355
dccp_ioctl(struct sock * sk,int cmd,unsigned long arg)356 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
357 {
358 int rc = -ENOTCONN;
359
360 lock_sock(sk);
361
362 if (sk->sk_state == DCCP_LISTEN)
363 goto out;
364
365 switch (cmd) {
366 case SIOCOUTQ: {
367 int amount = sk_wmem_alloc_get(sk);
368 /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
369 * always 0, comparably to UDP.
370 */
371
372 rc = put_user(amount, (int __user *)arg);
373 }
374 break;
375 case SIOCINQ: {
376 struct sk_buff *skb;
377 unsigned long amount = 0;
378
379 skb = skb_peek(&sk->sk_receive_queue);
380 if (skb != NULL) {
381 /*
382 * We will only return the amount of this packet since
383 * that is all that will be read.
384 */
385 amount = skb->len;
386 }
387 rc = put_user(amount, (int __user *)arg);
388 }
389 break;
390 default:
391 rc = -ENOIOCTLCMD;
392 break;
393 }
394 out:
395 release_sock(sk);
396 return rc;
397 }
398
399 EXPORT_SYMBOL_GPL(dccp_ioctl);
400
dccp_setsockopt_service(struct sock * sk,const __be32 service,sockptr_t optval,unsigned int optlen)401 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
402 sockptr_t optval, unsigned int optlen)
403 {
404 struct dccp_sock *dp = dccp_sk(sk);
405 struct dccp_service_list *sl = NULL;
406
407 if (service == DCCP_SERVICE_INVALID_VALUE ||
408 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
409 return -EINVAL;
410
411 if (optlen > sizeof(service)) {
412 sl = kmalloc(optlen, GFP_KERNEL);
413 if (sl == NULL)
414 return -ENOMEM;
415
416 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
417 if (copy_from_sockptr_offset(sl->dccpsl_list, optval,
418 sizeof(service), optlen - sizeof(service)) ||
419 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
420 kfree(sl);
421 return -EFAULT;
422 }
423 }
424
425 lock_sock(sk);
426 dp->dccps_service = service;
427
428 kfree(dp->dccps_service_list);
429
430 dp->dccps_service_list = sl;
431 release_sock(sk);
432 return 0;
433 }
434
dccp_setsockopt_cscov(struct sock * sk,int cscov,bool rx)435 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
436 {
437 u8 *list, len;
438 int i, rc;
439
440 if (cscov < 0 || cscov > 15)
441 return -EINVAL;
442 /*
443 * Populate a list of permissible values, in the range cscov...15. This
444 * is necessary since feature negotiation of single values only works if
445 * both sides incidentally choose the same value. Since the list starts
446 * lowest-value first, negotiation will pick the smallest shared value.
447 */
448 if (cscov == 0)
449 return 0;
450 len = 16 - cscov;
451
452 list = kmalloc(len, GFP_KERNEL);
453 if (list == NULL)
454 return -ENOBUFS;
455
456 for (i = 0; i < len; i++)
457 list[i] = cscov++;
458
459 rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
460
461 if (rc == 0) {
462 if (rx)
463 dccp_sk(sk)->dccps_pcrlen = cscov;
464 else
465 dccp_sk(sk)->dccps_pcslen = cscov;
466 }
467 kfree(list);
468 return rc;
469 }
470
dccp_setsockopt_ccid(struct sock * sk,int type,sockptr_t optval,unsigned int optlen)471 static int dccp_setsockopt_ccid(struct sock *sk, int type,
472 sockptr_t optval, unsigned int optlen)
473 {
474 u8 *val;
475 int rc = 0;
476
477 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
478 return -EINVAL;
479
480 val = memdup_sockptr(optval, optlen);
481 if (IS_ERR(val))
482 return PTR_ERR(val);
483
484 lock_sock(sk);
485 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
486 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
487
488 if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
489 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
490 release_sock(sk);
491
492 kfree(val);
493 return rc;
494 }
495
do_dccp_setsockopt(struct sock * sk,int level,int optname,sockptr_t optval,unsigned int optlen)496 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
497 sockptr_t optval, unsigned int optlen)
498 {
499 struct dccp_sock *dp = dccp_sk(sk);
500 int val, err = 0;
501
502 switch (optname) {
503 case DCCP_SOCKOPT_PACKET_SIZE:
504 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
505 return 0;
506 case DCCP_SOCKOPT_CHANGE_L:
507 case DCCP_SOCKOPT_CHANGE_R:
508 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
509 return 0;
510 case DCCP_SOCKOPT_CCID:
511 case DCCP_SOCKOPT_RX_CCID:
512 case DCCP_SOCKOPT_TX_CCID:
513 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
514 }
515
516 if (optlen < (int)sizeof(int))
517 return -EINVAL;
518
519 if (copy_from_sockptr(&val, optval, sizeof(int)))
520 return -EFAULT;
521
522 if (optname == DCCP_SOCKOPT_SERVICE)
523 return dccp_setsockopt_service(sk, val, optval, optlen);
524
525 lock_sock(sk);
526 switch (optname) {
527 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
528 if (dp->dccps_role != DCCP_ROLE_SERVER)
529 err = -EOPNOTSUPP;
530 else
531 dp->dccps_server_timewait = (val != 0);
532 break;
533 case DCCP_SOCKOPT_SEND_CSCOV:
534 err = dccp_setsockopt_cscov(sk, val, false);
535 break;
536 case DCCP_SOCKOPT_RECV_CSCOV:
537 err = dccp_setsockopt_cscov(sk, val, true);
538 break;
539 case DCCP_SOCKOPT_QPOLICY_ID:
540 if (sk->sk_state != DCCP_CLOSED)
541 err = -EISCONN;
542 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
543 err = -EINVAL;
544 else
545 dp->dccps_qpolicy = val;
546 break;
547 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
548 if (val < 0)
549 err = -EINVAL;
550 else
551 dp->dccps_tx_qlen = val;
552 break;
553 default:
554 err = -ENOPROTOOPT;
555 break;
556 }
557 release_sock(sk);
558
559 return err;
560 }
561
dccp_setsockopt(struct sock * sk,int level,int optname,sockptr_t optval,unsigned int optlen)562 int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
563 unsigned int optlen)
564 {
565 if (level != SOL_DCCP)
566 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
567 optname, optval,
568 optlen);
569 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
570 }
571
572 EXPORT_SYMBOL_GPL(dccp_setsockopt);
573
dccp_getsockopt_service(struct sock * sk,int len,__be32 __user * optval,int __user * optlen)574 static int dccp_getsockopt_service(struct sock *sk, int len,
575 __be32 __user *optval,
576 int __user *optlen)
577 {
578 const struct dccp_sock *dp = dccp_sk(sk);
579 const struct dccp_service_list *sl;
580 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
581
582 lock_sock(sk);
583 if ((sl = dp->dccps_service_list) != NULL) {
584 slen = sl->dccpsl_nr * sizeof(u32);
585 total_len += slen;
586 }
587
588 err = -EINVAL;
589 if (total_len > len)
590 goto out;
591
592 err = 0;
593 if (put_user(total_len, optlen) ||
594 put_user(dp->dccps_service, optval) ||
595 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
596 err = -EFAULT;
597 out:
598 release_sock(sk);
599 return err;
600 }
601
do_dccp_getsockopt(struct sock * sk,int level,int optname,char __user * optval,int __user * optlen)602 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
603 char __user *optval, int __user *optlen)
604 {
605 struct dccp_sock *dp;
606 int val, len;
607
608 if (get_user(len, optlen))
609 return -EFAULT;
610
611 if (len < (int)sizeof(int))
612 return -EINVAL;
613
614 dp = dccp_sk(sk);
615
616 switch (optname) {
617 case DCCP_SOCKOPT_PACKET_SIZE:
618 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
619 return 0;
620 case DCCP_SOCKOPT_SERVICE:
621 return dccp_getsockopt_service(sk, len,
622 (__be32 __user *)optval, optlen);
623 case DCCP_SOCKOPT_GET_CUR_MPS:
624 val = dp->dccps_mss_cache;
625 break;
626 case DCCP_SOCKOPT_AVAILABLE_CCIDS:
627 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
628 case DCCP_SOCKOPT_TX_CCID:
629 val = ccid_get_current_tx_ccid(dp);
630 if (val < 0)
631 return -ENOPROTOOPT;
632 break;
633 case DCCP_SOCKOPT_RX_CCID:
634 val = ccid_get_current_rx_ccid(dp);
635 if (val < 0)
636 return -ENOPROTOOPT;
637 break;
638 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
639 val = dp->dccps_server_timewait;
640 break;
641 case DCCP_SOCKOPT_SEND_CSCOV:
642 val = dp->dccps_pcslen;
643 break;
644 case DCCP_SOCKOPT_RECV_CSCOV:
645 val = dp->dccps_pcrlen;
646 break;
647 case DCCP_SOCKOPT_QPOLICY_ID:
648 val = dp->dccps_qpolicy;
649 break;
650 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
651 val = dp->dccps_tx_qlen;
652 break;
653 case 128 ... 191:
654 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
655 len, (u32 __user *)optval, optlen);
656 case 192 ... 255:
657 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
658 len, (u32 __user *)optval, optlen);
659 default:
660 return -ENOPROTOOPT;
661 }
662
663 len = sizeof(val);
664 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
665 return -EFAULT;
666
667 return 0;
668 }
669
dccp_getsockopt(struct sock * sk,int level,int optname,char __user * optval,int __user * optlen)670 int dccp_getsockopt(struct sock *sk, int level, int optname,
671 char __user *optval, int __user *optlen)
672 {
673 if (level != SOL_DCCP)
674 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
675 optname, optval,
676 optlen);
677 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
678 }
679
680 EXPORT_SYMBOL_GPL(dccp_getsockopt);
681
dccp_msghdr_parse(struct msghdr * msg,struct sk_buff * skb)682 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
683 {
684 struct cmsghdr *cmsg;
685
686 /*
687 * Assign an (opaque) qpolicy priority value to skb->priority.
688 *
689 * We are overloading this skb field for use with the qpolicy subystem.
690 * The skb->priority is normally used for the SO_PRIORITY option, which
691 * is initialised from sk_priority. Since the assignment of sk_priority
692 * to skb->priority happens later (on layer 3), we overload this field
693 * for use with queueing priorities as long as the skb is on layer 4.
694 * The default priority value (if nothing is set) is 0.
695 */
696 skb->priority = 0;
697
698 for_each_cmsghdr(cmsg, msg) {
699 if (!CMSG_OK(msg, cmsg))
700 return -EINVAL;
701
702 if (cmsg->cmsg_level != SOL_DCCP)
703 continue;
704
705 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
706 !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
707 return -EINVAL;
708
709 switch (cmsg->cmsg_type) {
710 case DCCP_SCM_PRIORITY:
711 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
712 return -EINVAL;
713 skb->priority = *(__u32 *)CMSG_DATA(cmsg);
714 break;
715 default:
716 return -EINVAL;
717 }
718 }
719 return 0;
720 }
721
dccp_sendmsg(struct sock * sk,struct msghdr * msg,size_t len)722 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
723 {
724 const struct dccp_sock *dp = dccp_sk(sk);
725 const int flags = msg->msg_flags;
726 const int noblock = flags & MSG_DONTWAIT;
727 struct sk_buff *skb;
728 int rc, size;
729 long timeo;
730
731 trace_dccp_probe(sk, len);
732
733 if (len > dp->dccps_mss_cache)
734 return -EMSGSIZE;
735
736 lock_sock(sk);
737
738 timeo = sock_sndtimeo(sk, noblock);
739
740 /*
741 * We have to use sk_stream_wait_connect here to set sk_write_pending,
742 * so that the trick in dccp_rcv_request_sent_state_process.
743 */
744 /* Wait for a connection to finish. */
745 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
746 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
747 goto out_release;
748
749 size = sk->sk_prot->max_header + len;
750 release_sock(sk);
751 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
752 lock_sock(sk);
753 if (skb == NULL)
754 goto out_release;
755
756 if (dccp_qpolicy_full(sk)) {
757 rc = -EAGAIN;
758 goto out_discard;
759 }
760
761 if (sk->sk_state == DCCP_CLOSED) {
762 rc = -ENOTCONN;
763 goto out_discard;
764 }
765
766 skb_reserve(skb, sk->sk_prot->max_header);
767 rc = memcpy_from_msg(skb_put(skb, len), msg, len);
768 if (rc != 0)
769 goto out_discard;
770
771 rc = dccp_msghdr_parse(msg, skb);
772 if (rc != 0)
773 goto out_discard;
774
775 dccp_qpolicy_push(sk, skb);
776 /*
777 * The xmit_timer is set if the TX CCID is rate-based and will expire
778 * when congestion control permits to release further packets into the
779 * network. Window-based CCIDs do not use this timer.
780 */
781 if (!timer_pending(&dp->dccps_xmit_timer))
782 dccp_write_xmit(sk);
783 out_release:
784 release_sock(sk);
785 return rc ? : len;
786 out_discard:
787 kfree_skb(skb);
788 goto out_release;
789 }
790
791 EXPORT_SYMBOL_GPL(dccp_sendmsg);
792
dccp_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags,int * addr_len)793 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
794 int *addr_len)
795 {
796 const struct dccp_hdr *dh;
797 long timeo;
798
799 lock_sock(sk);
800
801 if (sk->sk_state == DCCP_LISTEN) {
802 len = -ENOTCONN;
803 goto out;
804 }
805
806 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
807
808 do {
809 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
810
811 if (skb == NULL)
812 goto verify_sock_status;
813
814 dh = dccp_hdr(skb);
815
816 switch (dh->dccph_type) {
817 case DCCP_PKT_DATA:
818 case DCCP_PKT_DATAACK:
819 goto found_ok_skb;
820
821 case DCCP_PKT_CLOSE:
822 case DCCP_PKT_CLOSEREQ:
823 if (!(flags & MSG_PEEK))
824 dccp_finish_passive_close(sk);
825 fallthrough;
826 case DCCP_PKT_RESET:
827 dccp_pr_debug("found fin (%s) ok!\n",
828 dccp_packet_name(dh->dccph_type));
829 len = 0;
830 goto found_fin_ok;
831 default:
832 dccp_pr_debug("packet_type=%s\n",
833 dccp_packet_name(dh->dccph_type));
834 sk_eat_skb(sk, skb);
835 }
836 verify_sock_status:
837 if (sock_flag(sk, SOCK_DONE)) {
838 len = 0;
839 break;
840 }
841
842 if (sk->sk_err) {
843 len = sock_error(sk);
844 break;
845 }
846
847 if (sk->sk_shutdown & RCV_SHUTDOWN) {
848 len = 0;
849 break;
850 }
851
852 if (sk->sk_state == DCCP_CLOSED) {
853 if (!sock_flag(sk, SOCK_DONE)) {
854 /* This occurs when user tries to read
855 * from never connected socket.
856 */
857 len = -ENOTCONN;
858 break;
859 }
860 len = 0;
861 break;
862 }
863
864 if (!timeo) {
865 len = -EAGAIN;
866 break;
867 }
868
869 if (signal_pending(current)) {
870 len = sock_intr_errno(timeo);
871 break;
872 }
873
874 sk_wait_data(sk, &timeo, NULL);
875 continue;
876 found_ok_skb:
877 if (len > skb->len)
878 len = skb->len;
879 else if (len < skb->len)
880 msg->msg_flags |= MSG_TRUNC;
881
882 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
883 /* Exception. Bailout! */
884 len = -EFAULT;
885 break;
886 }
887 if (flags & MSG_TRUNC)
888 len = skb->len;
889 found_fin_ok:
890 if (!(flags & MSG_PEEK))
891 sk_eat_skb(sk, skb);
892 break;
893 } while (1);
894 out:
895 release_sock(sk);
896 return len;
897 }
898
899 EXPORT_SYMBOL_GPL(dccp_recvmsg);
900
inet_dccp_listen(struct socket * sock,int backlog)901 int inet_dccp_listen(struct socket *sock, int backlog)
902 {
903 struct sock *sk = sock->sk;
904 unsigned char old_state;
905 int err;
906
907 lock_sock(sk);
908
909 err = -EINVAL;
910 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
911 goto out;
912
913 old_state = sk->sk_state;
914 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
915 goto out;
916
917 WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
918 /* Really, if the socket is already in listen state
919 * we can only allow the backlog to be adjusted.
920 */
921 if (old_state != DCCP_LISTEN) {
922 struct dccp_sock *dp = dccp_sk(sk);
923
924 dp->dccps_role = DCCP_ROLE_LISTEN;
925
926 /* do not start to listen if feature negotiation setup fails */
927 if (dccp_feat_finalise_settings(dp)) {
928 err = -EPROTO;
929 goto out;
930 }
931
932 err = inet_csk_listen_start(sk);
933 if (err)
934 goto out;
935 }
936 err = 0;
937
938 out:
939 release_sock(sk);
940 return err;
941 }
942
943 EXPORT_SYMBOL_GPL(inet_dccp_listen);
944
dccp_terminate_connection(struct sock * sk)945 static void dccp_terminate_connection(struct sock *sk)
946 {
947 u8 next_state = DCCP_CLOSED;
948
949 switch (sk->sk_state) {
950 case DCCP_PASSIVE_CLOSE:
951 case DCCP_PASSIVE_CLOSEREQ:
952 dccp_finish_passive_close(sk);
953 break;
954 case DCCP_PARTOPEN:
955 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
956 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
957 fallthrough;
958 case DCCP_OPEN:
959 dccp_send_close(sk, 1);
960
961 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
962 !dccp_sk(sk)->dccps_server_timewait)
963 next_state = DCCP_ACTIVE_CLOSEREQ;
964 else
965 next_state = DCCP_CLOSING;
966 fallthrough;
967 default:
968 dccp_set_state(sk, next_state);
969 }
970 }
971
dccp_close(struct sock * sk,long timeout)972 void dccp_close(struct sock *sk, long timeout)
973 {
974 struct dccp_sock *dp = dccp_sk(sk);
975 struct sk_buff *skb;
976 u32 data_was_unread = 0;
977 int state;
978
979 lock_sock(sk);
980
981 sk->sk_shutdown = SHUTDOWN_MASK;
982
983 if (sk->sk_state == DCCP_LISTEN) {
984 dccp_set_state(sk, DCCP_CLOSED);
985
986 /* Special case. */
987 inet_csk_listen_stop(sk);
988
989 goto adjudge_to_death;
990 }
991
992 sk_stop_timer(sk, &dp->dccps_xmit_timer);
993
994 /*
995 * We need to flush the recv. buffs. We do this only on the
996 * descriptor close, not protocol-sourced closes, because the
997 *reader process may not have drained the data yet!
998 */
999 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1000 data_was_unread += skb->len;
1001 __kfree_skb(skb);
1002 }
1003
1004 /* If socket has been already reset kill it. */
1005 if (sk->sk_state == DCCP_CLOSED)
1006 goto adjudge_to_death;
1007
1008 if (data_was_unread) {
1009 /* Unread data was tossed, send an appropriate Reset Code */
1010 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1011 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1012 dccp_set_state(sk, DCCP_CLOSED);
1013 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1014 /* Check zero linger _after_ checking for unread data. */
1015 sk->sk_prot->disconnect(sk, 0);
1016 } else if (sk->sk_state != DCCP_CLOSED) {
1017 /*
1018 * Normal connection termination. May need to wait if there are
1019 * still packets in the TX queue that are delayed by the CCID.
1020 */
1021 dccp_flush_write_queue(sk, &timeout);
1022 dccp_terminate_connection(sk);
1023 }
1024
1025 /*
1026 * Flush write queue. This may be necessary in several cases:
1027 * - we have been closed by the peer but still have application data;
1028 * - abortive termination (unread data or zero linger time),
1029 * - normal termination but queue could not be flushed within time limit
1030 */
1031 __skb_queue_purge(&sk->sk_write_queue);
1032
1033 sk_stream_wait_close(sk, timeout);
1034
1035 adjudge_to_death:
1036 state = sk->sk_state;
1037 sock_hold(sk);
1038 sock_orphan(sk);
1039
1040 /*
1041 * It is the last release_sock in its life. It will remove backlog.
1042 */
1043 release_sock(sk);
1044 /*
1045 * Now socket is owned by kernel and we acquire BH lock
1046 * to finish close. No need to check for user refs.
1047 */
1048 local_bh_disable();
1049 bh_lock_sock(sk);
1050 WARN_ON(sock_owned_by_user(sk));
1051
1052 this_cpu_inc(dccp_orphan_count);
1053
1054 /* Have we already been destroyed by a softirq or backlog? */
1055 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1056 goto out;
1057
1058 if (sk->sk_state == DCCP_CLOSED)
1059 inet_csk_destroy_sock(sk);
1060
1061 /* Otherwise, socket is reprieved until protocol close. */
1062
1063 out:
1064 bh_unlock_sock(sk);
1065 local_bh_enable();
1066 sock_put(sk);
1067 }
1068
1069 EXPORT_SYMBOL_GPL(dccp_close);
1070
dccp_shutdown(struct sock * sk,int how)1071 void dccp_shutdown(struct sock *sk, int how)
1072 {
1073 dccp_pr_debug("called shutdown(%x)\n", how);
1074 }
1075
1076 EXPORT_SYMBOL_GPL(dccp_shutdown);
1077
dccp_mib_init(void)1078 static inline int __init dccp_mib_init(void)
1079 {
1080 dccp_statistics = alloc_percpu(struct dccp_mib);
1081 if (!dccp_statistics)
1082 return -ENOMEM;
1083 return 0;
1084 }
1085
dccp_mib_exit(void)1086 static inline void dccp_mib_exit(void)
1087 {
1088 free_percpu(dccp_statistics);
1089 }
1090
1091 static int thash_entries;
1092 module_param(thash_entries, int, 0444);
1093 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1094
1095 #ifdef CONFIG_IP_DCCP_DEBUG
1096 bool dccp_debug;
1097 module_param(dccp_debug, bool, 0644);
1098 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1099
1100 EXPORT_SYMBOL_GPL(dccp_debug);
1101 #endif
1102
dccp_init(void)1103 static int __init dccp_init(void)
1104 {
1105 unsigned long goal;
1106 unsigned long nr_pages = totalram_pages();
1107 int ehash_order, bhash_order, i;
1108 int rc;
1109
1110 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1111 sizeof_field(struct sk_buff, cb));
1112 rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
1113 if (rc)
1114 goto out_fail;
1115 rc = -ENOBUFS;
1116 dccp_hashinfo.bind_bucket_cachep =
1117 kmem_cache_create("dccp_bind_bucket",
1118 sizeof(struct inet_bind_bucket), 0,
1119 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1120 if (!dccp_hashinfo.bind_bucket_cachep)
1121 goto out_free_hashinfo2;
1122 dccp_hashinfo.bind2_bucket_cachep =
1123 kmem_cache_create("dccp_bind2_bucket",
1124 sizeof(struct inet_bind2_bucket), 0,
1125 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1126 if (!dccp_hashinfo.bind2_bucket_cachep)
1127 goto out_free_bind_bucket_cachep;
1128
1129 /*
1130 * Size and allocate the main established and bind bucket
1131 * hash tables.
1132 *
1133 * The methodology is similar to that of the buffer cache.
1134 */
1135 if (nr_pages >= (128 * 1024))
1136 goal = nr_pages >> (21 - PAGE_SHIFT);
1137 else
1138 goal = nr_pages >> (23 - PAGE_SHIFT);
1139
1140 if (thash_entries)
1141 goal = (thash_entries *
1142 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1143 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1144 ;
1145 do {
1146 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1147 sizeof(struct inet_ehash_bucket);
1148
1149 while (hash_size & (hash_size - 1))
1150 hash_size--;
1151 dccp_hashinfo.ehash_mask = hash_size - 1;
1152 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1153 __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1154 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1155
1156 if (!dccp_hashinfo.ehash) {
1157 DCCP_CRIT("Failed to allocate DCCP established hash table");
1158 goto out_free_bind2_bucket_cachep;
1159 }
1160
1161 for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1162 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1163
1164 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1165 goto out_free_dccp_ehash;
1166
1167 bhash_order = ehash_order;
1168
1169 do {
1170 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1171 sizeof(struct inet_bind_hashbucket);
1172 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1173 bhash_order > 0)
1174 continue;
1175 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1176 __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1177 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1178
1179 if (!dccp_hashinfo.bhash) {
1180 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1181 goto out_free_dccp_locks;
1182 }
1183
1184 dccp_hashinfo.bhash2 = (struct inet_bind_hashbucket *)
1185 __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
1186
1187 if (!dccp_hashinfo.bhash2) {
1188 DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
1189 goto out_free_dccp_bhash;
1190 }
1191
1192 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1193 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1194 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1195 spin_lock_init(&dccp_hashinfo.bhash2[i].lock);
1196 INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
1197 }
1198
1199 dccp_hashinfo.pernet = false;
1200
1201 rc = dccp_mib_init();
1202 if (rc)
1203 goto out_free_dccp_bhash2;
1204
1205 rc = dccp_ackvec_init();
1206 if (rc)
1207 goto out_free_dccp_mib;
1208
1209 rc = dccp_sysctl_init();
1210 if (rc)
1211 goto out_ackvec_exit;
1212
1213 rc = ccid_initialize_builtins();
1214 if (rc)
1215 goto out_sysctl_exit;
1216
1217 dccp_timestamping_init();
1218
1219 return 0;
1220
1221 out_sysctl_exit:
1222 dccp_sysctl_exit();
1223 out_ackvec_exit:
1224 dccp_ackvec_exit();
1225 out_free_dccp_mib:
1226 dccp_mib_exit();
1227 out_free_dccp_bhash2:
1228 free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
1229 out_free_dccp_bhash:
1230 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1231 out_free_dccp_locks:
1232 inet_ehash_locks_free(&dccp_hashinfo);
1233 out_free_dccp_ehash:
1234 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1235 out_free_bind2_bucket_cachep:
1236 kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
1237 out_free_bind_bucket_cachep:
1238 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1239 out_free_hashinfo2:
1240 inet_hashinfo2_free_mod(&dccp_hashinfo);
1241 out_fail:
1242 dccp_hashinfo.bhash = NULL;
1243 dccp_hashinfo.bhash2 = NULL;
1244 dccp_hashinfo.ehash = NULL;
1245 dccp_hashinfo.bind_bucket_cachep = NULL;
1246 dccp_hashinfo.bind2_bucket_cachep = NULL;
1247 return rc;
1248 }
1249
dccp_fini(void)1250 static void __exit dccp_fini(void)
1251 {
1252 int bhash_order = get_order(dccp_hashinfo.bhash_size *
1253 sizeof(struct inet_bind_hashbucket));
1254
1255 ccid_cleanup_builtins();
1256 dccp_mib_exit();
1257 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1258 free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
1259 free_pages((unsigned long)dccp_hashinfo.ehash,
1260 get_order((dccp_hashinfo.ehash_mask + 1) *
1261 sizeof(struct inet_ehash_bucket)));
1262 inet_ehash_locks_free(&dccp_hashinfo);
1263 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1264 dccp_ackvec_exit();
1265 dccp_sysctl_exit();
1266 inet_hashinfo2_free_mod(&dccp_hashinfo);
1267 }
1268
1269 module_init(dccp_init);
1270 module_exit(dccp_fini);
1271
1272 MODULE_LICENSE("GPL");
1273 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1274 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1275