1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Definitions for the AF_INET socket handler.
7 *
8 * Version: @(#)sock.h 1.0.4 05/13/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Corey Minyard <wf-rch!minyard@relay.EU.net>
13 * Florian La Roche <flla@stud.uni-sb.de>
14 *
15 * Fixes:
16 * Alan Cox : Volatiles in skbuff pointers. See
17 * skbuff comments. May be overdone,
18 * better to prove they can be removed
19 * than the reverse.
20 * Alan Cox : Added a zapped field for tcp to note
21 * a socket is reset and must stay shut up
22 * Alan Cox : New fields for options
23 * Pauline Middelink : identd support
24 * Alan Cox : Eliminate low level recv/recvfrom
25 * David S. Miller : New socket lookup architecture.
26 * Steve Whitehouse: Default routines for sock_ops
27 *
28 * This program is free software; you can redistribute it and/or
29 * modify it under the terms of the GNU General Public License
30 * as published by the Free Software Foundation; either version
31 * 2 of the License, or (at your option) any later version.
32 */
33 #ifndef _SOCK_H
34 #define _SOCK_H
35
36 #include <linux/config.h>
37 #include <linux/timer.h>
38 #include <linux/cache.h>
39 #include <linux/in.h> /* struct sockaddr_in */
40
41 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
42 #include <linux/in6.h> /* struct sockaddr_in6 */
43 #include <linux/ipv6.h> /* dest_cache, inet6_options */
44 #include <linux/icmpv6.h>
45 #include <net/if_inet6.h> /* struct ipv6_mc_socklist */
46 #endif
47
48 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
49 #include <linux/icmp.h>
50 #endif
51 #include <linux/tcp.h> /* struct tcphdr */
52 #if defined(CONFIG_IP_SCTP) || defined (CONFIG_IP_SCTP_MODULE)
53 #include <net/sctp/structs.h> /* struct sctp_opt */
54 #endif
55
56 #include <linux/netdevice.h>
57 #include <linux/skbuff.h> /* struct sk_buff */
58 #include <net/protocol.h> /* struct inet_protocol */
59 #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
60 #include <net/x25.h>
61 #endif
62 #if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
63 #include <linux/if_wanpipe.h>
64 #endif
65
66 #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
67 #include <net/ax25.h>
68 #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
69 #include <net/netrom.h>
70 #endif
71 #if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
72 #include <net/rose.h>
73 #endif
74 #endif
75
76 #if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
77 #include <linux/if_pppox.h>
78 #include <linux/ppp_channel.h> /* struct ppp_channel */
79 #endif
80
81 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
82 #if defined(CONFIG_SPX) || defined(CONFIG_SPX_MODULE)
83 #include <net/spx.h>
84 #else
85 #include <net/ipx.h>
86 #endif /* CONFIG_SPX */
87 #endif /* CONFIG_IPX */
88
89 #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
90 #include <linux/atalk.h>
91 #endif
92
93 #if defined(CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
94 #include <net/dn.h>
95 #endif
96
97 #if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
98 #include <net/irda/irda.h>
99 #endif
100
101 #if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
102 struct atm_vcc;
103 #endif
104
105 #ifdef CONFIG_FILTER
106 #include <linux/filter.h>
107 #endif
108
109 #include <asm/atomic.h>
110 #include <net/dst.h>
111
112
113 /* The AF_UNIX specific socket options */
114 struct unix_opt {
115 struct unix_address *addr;
116 struct dentry * dentry;
117 struct vfsmount * mnt;
118 struct semaphore readsem;
119 struct sock * other;
120 struct sock ** list;
121 struct sock * gc_tree;
122 atomic_t inflight;
123 rwlock_t lock;
124 wait_queue_head_t peer_wait;
125 };
126
127
128 /* Once the IPX ncpd patches are in these are going into protinfo. */
129 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
130 struct ipx_opt {
131 ipx_address dest_addr;
132 ipx_interface *intrfc;
133 unsigned short port;
134 #ifdef CONFIG_IPX_INTERN
135 unsigned char node[IPX_NODE_LEN];
136 #endif
137 unsigned short type;
138 /*
139 * To handle special ncp connection-handling sockets for mars_nwe,
140 * the connection number must be stored in the socket.
141 */
142 unsigned short ipx_ncp_conn;
143 };
144 #endif
145
146 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
147 struct ipv6_pinfo {
148 struct in6_addr saddr;
149 struct in6_addr rcv_saddr;
150 struct in6_addr daddr;
151 struct in6_addr *daddr_cache;
152
153 __u32 flow_label;
154 __u32 frag_size;
155 int hop_limit;
156 int mcast_hops;
157 int mcast_oif;
158
159 /* pktoption flags */
160 union {
161 struct {
162 __u8 srcrt:2,
163 rxinfo:1,
164 rxhlim:1,
165 hopopts:1,
166 dstopts:1,
167 authhdr:1,
168 rxflow:1;
169 } bits;
170 __u8 all;
171 } rxopt;
172
173 /* sockopt flags */
174 __u8 mc_loop:1,
175 recverr:1,
176 sndflow:1,
177 pmtudisc:2,
178 ipv6only:1;
179
180 struct ipv6_mc_socklist *ipv6_mc_list;
181 struct ipv6_ac_socklist *ipv6_ac_list;
182 struct ipv6_fl_socklist *ipv6_fl_list;
183 __u32 dst_cookie;
184
185 struct ipv6_txoptions *opt;
186 struct sk_buff *pktoptions;
187 };
188
189 struct raw6_opt {
190 __u32 checksum; /* perform checksum */
191 __u32 offset; /* checksum offset */
192
193 struct icmp6_filter filter;
194 };
195
196 #define __ipv6_only_sock(sk) ((sk)->net_pinfo.af_inet6.ipv6only)
197 #define ipv6_only_sock(sk) ((sk)->family == PF_INET6 && \
198 (sk)->net_pinfo.af_inet6.ipv6only)
199 #else
200 #define __ipv6_only_sock(sk) 0
201 #define ipv6_only_sock(sk) 0
202 #endif /* IPV6 */
203
204 #if defined(CONFIG_INET) || defined(CONFIG_INET_MODULE)
205 struct raw_opt {
206 struct icmp_filter filter;
207 };
208 #endif
209
210 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
211 struct inet_opt
212 {
213 int ttl; /* TTL setting */
214 int tos; /* TOS */
215 unsigned cmsg_flags;
216 struct ip_options *opt;
217 unsigned char hdrincl; /* Include headers ? */
218 __u8 mc_ttl; /* Multicasting TTL */
219 __u8 mc_loop; /* Loopback */
220 unsigned recverr : 1,
221 freebind : 1;
222 __u16 id; /* ID counter for DF pkts */
223 __u8 pmtudisc;
224 int mc_index; /* Multicast device index */
225 __u32 mc_addr;
226 struct ip_mc_socklist *mc_list; /* Group array */
227 };
228 #endif
229
230 #if defined(CONFIG_PPPOE) || defined (CONFIG_PPPOE_MODULE)
231 struct pppoe_opt
232 {
233 struct net_device *dev; /* device associated with socket*/
234 struct pppoe_addr pa; /* what this socket is bound to*/
235 struct sockaddr_pppox relay; /* what socket data will be
236 relayed to (PPPoE relaying) */
237 };
238
239 struct pppox_opt
240 {
241 struct ppp_channel chan;
242 struct sock *sk;
243 struct pppox_opt *next; /* for hash table */
244 union {
245 struct pppoe_opt pppoe;
246 } proto;
247 };
248 #define pppoe_dev proto.pppoe.dev
249 #define pppoe_pa proto.pppoe.pa
250 #define pppoe_relay proto.pppoe.relay
251 #endif
252
253 /* This defines a selective acknowledgement block. */
254 struct tcp_sack_block {
255 __u32 start_seq;
256 __u32 end_seq;
257 };
258
259 enum tcp_congestion_algo {
260 TCP_RENO=0,
261 TCP_VEGAS,
262 TCP_WESTWOOD,
263 TCP_BIC,
264 };
265
266 struct tcp_opt {
267 int tcp_header_len; /* Bytes of tcp header to send */
268
269 /*
270 * Header prediction flags
271 * 0x5?10 << 16 + snd_wnd in net byte order
272 */
273 __u32 pred_flags;
274
275 /*
276 * RFC793 variables by their proper names. This means you can
277 * read the code and the spec side by side (and laugh ...)
278 * See RFC793 and RFC1122. The RFC writes these in capitals.
279 */
280 __u32 rcv_nxt; /* What we want to receive next */
281 __u32 snd_nxt; /* Next sequence we send */
282
283 __u32 snd_una; /* First byte we want an ack for */
284 __u32 snd_sml; /* Last byte of the most recently transmitted small packet */
285 __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
286 __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */
287
288 /* Delayed ACK control data */
289 struct {
290 __u8 pending; /* ACK is pending */
291 __u8 quick; /* Scheduled number of quick acks */
292 __u8 pingpong; /* The session is interactive */
293 __u8 blocked; /* Delayed ACK was blocked by socket lock*/
294 __u32 ato; /* Predicted tick of soft clock */
295 unsigned long timeout; /* Currently scheduled timeout */
296 __u32 lrcvtime; /* timestamp of last received data packet*/
297 __u16 last_seg_size; /* Size of last incoming segment */
298 __u16 rcv_mss; /* MSS used for delayed ACK decisions */
299 } ack;
300
301 /* Data for direct copy to user */
302 struct {
303 struct sk_buff_head prequeue;
304 struct task_struct *task;
305 struct iovec *iov;
306 int memory;
307 int len;
308 } ucopy;
309
310 __u32 snd_wl1; /* Sequence for window update */
311 __u32 snd_wnd; /* The window we expect to receive */
312 __u32 max_window; /* Maximal window ever seen from peer */
313 __u32 pmtu_cookie; /* Last pmtu seen by socket */
314 __u16 mss_cache; /* Cached effective mss, not including SACKS */
315 __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
316 __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
317 __u8 ca_state; /* State of fast-retransmit machine */
318 __u8 retransmits; /* Number of unrecovered RTO timeouts. */
319
320 __u8 reordering; /* Packet reordering metric. */
321 __u8 queue_shrunk; /* Write queue has been shrunk recently.*/
322 __u8 defer_accept; /* User waits for some data after accept() */
323
324 /* RTT measurement */
325 __u8 backoff; /* backoff */
326 __u32 srtt; /* smothed round trip time << 3 */
327 __u32 mdev; /* medium deviation */
328 __u32 mdev_max; /* maximal mdev for the last rtt period */
329 __u32 rttvar; /* smoothed mdev_max */
330 __u32 rtt_seq; /* sequence number to update rttvar */
331 __u32 rto; /* retransmit timeout */
332
333 __u32 packets_out; /* Packets which are "in flight" */
334 __u32 left_out; /* Packets which leaved network */
335 __u32 retrans_out; /* Retransmitted packets out */
336
337
338 /*
339 * Slow start and congestion control (see also Nagle, and Karn & Partridge)
340 */
341 __u32 snd_ssthresh; /* Slow start size threshold */
342 __u32 snd_cwnd; /* Sending congestion window */
343 __u16 snd_cwnd_cnt; /* Linear increase counter */
344 __u16 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
345 __u32 snd_cwnd_used;
346 __u32 snd_cwnd_stamp;
347
348 /* Two commonly used timers in both sender and receiver paths. */
349 unsigned long timeout;
350 struct timer_list retransmit_timer; /* Resend (no ack) */
351 struct timer_list delack_timer; /* Ack delay */
352
353 struct sk_buff_head out_of_order_queue; /* Out of order segments go here */
354
355 struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */
356 struct sk_buff *send_head; /* Front of stuff to transmit */
357 struct page *sndmsg_page; /* Cached page for sendmsg */
358 u32 sndmsg_off; /* Cached offset for sendmsg */
359
360 __u32 rcv_wnd; /* Current receiver window */
361 __u32 rcv_wup; /* rcv_nxt on last window update sent */
362 __u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
363 __u32 pushed_seq; /* Last pushed seq, required to talk to windows */
364 __u32 copied_seq; /* Head of yet unread data */
365 /*
366 * Options received (usually on last packet, some only on SYN packets).
367 */
368 char tstamp_ok, /* TIMESTAMP seen on SYN packet */
369 wscale_ok, /* Wscale seen on SYN packet */
370 sack_ok; /* SACK seen on SYN packet */
371 char saw_tstamp; /* Saw TIMESTAMP on last packet */
372 __u8 snd_wscale; /* Window scaling received from sender */
373 __u8 rcv_wscale; /* Window scaling to send to receiver */
374 __u8 nonagle; /* Disable Nagle algorithm? */
375 __u8 keepalive_probes; /* num of allowed keep alive probes */
376
377 /* PAWS/RTTM data */
378 __u32 rcv_tsval; /* Time stamp value */
379 __u32 rcv_tsecr; /* Time stamp echo reply */
380 __u32 ts_recent; /* Time stamp to echo next */
381 long ts_recent_stamp;/* Time we stored ts_recent (for aging) */
382
383 /* SACKs data */
384 __u16 user_mss; /* mss requested by user in ioctl */
385 __u8 dsack; /* D-SACK is scheduled */
386 __u8 eff_sacks; /* Size of SACK array to send with next packet */
387 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
388 struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
389
390 __u32 window_clamp; /* Maximal window to advertise */
391 __u32 rcv_ssthresh; /* Current window clamp */
392 __u8 probes_out; /* unanswered 0 window probes */
393 __u8 num_sacks; /* Number of SACK blocks */
394 __u16 advmss; /* Advertised MSS */
395
396 __u8 syn_retries; /* num of allowed syn retries */
397 __u8 ecn_flags; /* ECN status bits. */
398 __u16 prior_ssthresh; /* ssthresh saved at recovery start */
399 __u32 lost_out; /* Lost packets */
400 __u32 sacked_out; /* SACK'd packets */
401 __u32 fackets_out; /* FACK'd packets */
402 __u32 high_seq; /* snd_nxt at onset of congestion */
403
404 __u32 retrans_stamp; /* Timestamp of the last retransmit,
405 * also used in SYN-SENT to remember stamp of
406 * the first SYN. */
407 __u32 undo_marker; /* tracking retrans started here. */
408 int undo_retrans; /* number of undoable retransmissions. */
409 __u32 urg_seq; /* Seq of received urgent pointer */
410 __u16 urg_data; /* Saved octet of OOB data and control flags */
411 __u8 pending; /* Scheduled timer event */
412 __u8 urg_mode; /* In urgent mode */
413 __u32 snd_up; /* Urgent pointer */
414
415 /* The syn_wait_lock is necessary only to avoid tcp_get_info having
416 * to grab the main lock sock while browsing the listening hash
417 * (otherwise it's deadlock prone).
418 * This lock is acquired in read mode only from tcp_get_info() and
419 * it's acquired in write mode _only_ from code that is actively
420 * changing the syn_wait_queue. All readers that are holding
421 * the master sock lock don't need to grab this lock in read mode
422 * too as the syn_wait_queue writes are always protected from
423 * the main sock lock.
424 */
425 rwlock_t syn_wait_lock;
426 struct tcp_listen_opt *listen_opt;
427
428 /* FIFO of established children */
429 struct open_request *accept_queue;
430 struct open_request *accept_queue_tail;
431
432 int write_pending; /* A write to socket waits to start. */
433
434 unsigned int keepalive_time; /* time before keep alive takes place */
435 unsigned int keepalive_intvl; /* time interval between keep alive probes */
436 int linger2;
437
438 __u8 adv_cong; /* Using Vegas, Westwood, or BIC */
439 __u8 frto_counter; /* Number of new acks after RTO */
440 __u32 frto_highmark; /* snd_nxt when RTO occurred */
441
442 unsigned long last_synq_overflow;
443
444 /* Receiver side RTT estimation */
445 struct {
446 __u32 rtt;
447 __u32 seq;
448 __u32 time;
449 } rcv_rtt_est;
450
451 /* Receiver queue space */
452 struct {
453 int space;
454 __u32 seq;
455 __u32 time;
456 } rcvq_space;
457
458 /* TCP Westwood structure */
459 struct {
460 __u32 bw_ns_est; /* first bandwidth estimation..not too smoothed 8) */
461 __u32 bw_est; /* bandwidth estimate */
462 __u32 rtt_win_sx; /* here starts a new evaluation... */
463 __u32 bk;
464 __u32 snd_una; /* used for evaluating the number of acked bytes */
465 __u32 cumul_ack;
466 __u32 accounted;
467 __u32 rtt;
468 __u32 rtt_min; /* minimum observed RTT */
469 } westwood;
470
471 /* Vegas variables */
472 struct {
473 __u32 beg_snd_nxt; /* right edge during last RTT */
474 __u32 beg_snd_una; /* left edge during last RTT */
475 __u32 beg_snd_cwnd; /* saves the size of the cwnd */
476 __u8 doing_vegas_now;/* if true, do vegas for this RTT */
477 __u16 cntRTT; /* # of RTTs measured within last RTT */
478 __u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
479 __u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
480 } vegas;
481
482 /* BI TCP Parameters */
483 struct {
484 __u32 cnt; /* increase cwnd by 1 after this number of ACKs */
485 __u32 last_max_cwnd; /* last maximium snd_cwnd */
486 __u32 last_cwnd; /* the last snd_cwnd */
487 __u32 last_stamp; /* time when updated last_cwnd */
488 } bictcp;
489 };
490
491
492 /*
493 * This structure really needs to be cleaned up.
494 * Most of it is for TCP, and not used by any of
495 * the other protocols.
496 */
497
498 /*
499 * The idea is to start moving to a newer struct gradualy
500 *
501 * IMHO the newer struct should have the following format:
502 *
503 * struct sock {
504 * sockmem [mem, proto, callbacks]
505 *
506 * union or struct {
507 * ax25;
508 * } ll_pinfo;
509 *
510 * union {
511 * ipv4;
512 * ipv6;
513 * ipx;
514 * netrom;
515 * rose;
516 * x25;
517 * } net_pinfo;
518 *
519 * union {
520 * tcp;
521 * udp;
522 * spx;
523 * netrom;
524 * } tp_pinfo;
525 *
526 * }
527 *
528 * The idea failed because IPv6 transition asssumes dual IP/IPv6 sockets.
529 * So, net_pinfo is IPv6 are really, and protinfo unifies all another
530 * private areas.
531 */
532
533 /* Define this to get the sk->debug debugging facility. */
534 #define SOCK_DEBUGGING
535 #ifdef SOCK_DEBUGGING
536 #define SOCK_DEBUG(sk, msg...) do { if((sk) && ((sk)->debug)) printk(KERN_DEBUG msg); } while (0)
537 #else
538 #define SOCK_DEBUG(sk, msg...) do { } while (0)
539 #endif
540
541 /* This is the per-socket lock. The spinlock provides a synchronization
542 * between user contexts and software interrupt processing, whereas the
543 * mini-semaphore synchronizes multiple users amongst themselves.
544 */
545 typedef struct {
546 spinlock_t slock;
547 unsigned int users;
548 wait_queue_head_t wq;
549 } socket_lock_t;
550
551 #define sock_lock_init(__sk) \
552 do { spin_lock_init(&((__sk)->lock.slock)); \
553 (__sk)->lock.users = 0; \
554 init_waitqueue_head(&((__sk)->lock.wq)); \
555 } while(0)
556
557 struct sock {
558 /* Socket demultiplex comparisons on incoming packets. */
559 __u32 daddr; /* Foreign IPv4 addr */
560 __u32 rcv_saddr; /* Bound local IPv4 addr */
561 __u16 dport; /* Destination port */
562 unsigned short num; /* Local port */
563 int bound_dev_if; /* Bound device index if != 0 */
564
565 /* Main hash linkage for various protocol lookup tables. */
566 struct sock *next;
567 struct sock **pprev;
568 struct sock *bind_next;
569 struct sock **bind_pprev;
570
571 volatile unsigned char state, /* Connection state */
572 zapped; /* In ax25 & ipx means not linked */
573 __u16 sport; /* Source port */
574
575 unsigned short family; /* Address family */
576 unsigned char reuse; /* SO_REUSEADDR setting */
577 unsigned char shutdown;
578 atomic_t refcnt; /* Reference count */
579
580 socket_lock_t lock; /* Synchronizer... */
581 int rcvbuf; /* Size of receive buffer in bytes */
582
583 wait_queue_head_t *sleep; /* Sock wait queue */
584 struct dst_entry *dst_cache; /* Destination cache */
585 rwlock_t dst_lock;
586 atomic_t rmem_alloc; /* Receive queue bytes committed */
587 struct sk_buff_head receive_queue; /* Incoming packets */
588 atomic_t wmem_alloc; /* Transmit queue bytes committed */
589 struct sk_buff_head write_queue; /* Packet sending queue */
590 atomic_t omem_alloc; /* "o" is "option" or "other" */
591 int wmem_queued; /* Persistent queue size */
592 int forward_alloc; /* Space allocated forward. */
593 __u32 saddr; /* Sending source */
594 unsigned int allocation; /* Allocation mode */
595 int sndbuf; /* Size of send buffer in bytes */
596 struct sock *prev;
597
598 /* Not all are volatile, but some are, so we might as well say they all are.
599 * XXX Make this a flag word -DaveM
600 */
601 volatile char dead,
602 done,
603 urginline,
604 keepopen,
605 linger,
606 destroy,
607 no_check,
608 broadcast,
609 bsdism;
610 unsigned char debug;
611 unsigned char rcvtstamp;
612 unsigned char use_write_queue;
613 unsigned char userlocks;
614 /* Hole of 3 bytes. Try to pack. */
615 int route_caps;
616 int proc;
617 unsigned long lingertime;
618
619 int hashent;
620 struct sock *pair;
621
622 /* The backlog queue is special, it is always used with
623 * the per-socket spinlock held and requires low latency
624 * access. Therefore we special case it's implementation.
625 */
626 struct {
627 struct sk_buff *head;
628 struct sk_buff *tail;
629 } backlog;
630
631 rwlock_t callback_lock;
632
633 /* Error queue, rarely used. */
634 struct sk_buff_head error_queue;
635
636 struct proto *prot;
637
638 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
639 union {
640 struct ipv6_pinfo af_inet6;
641 } net_pinfo;
642 #endif
643
644 union {
645 struct tcp_opt af_tcp;
646 #if defined(CONFIG_IP_SCTP) || defined (CONFIG_IP_SCTP_MODULE)
647 struct sctp_opt af_sctp;
648 #endif
649 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
650 struct raw_opt tp_raw4;
651 #endif
652 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
653 struct raw6_opt tp_raw;
654 #endif /* CONFIG_IPV6 */
655 #if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE)
656 struct spx_opt af_spx;
657 #endif /* CONFIG_SPX */
658
659 } tp_pinfo;
660
661 int err, err_soft; /* Soft holds errors that don't
662 cause failure but are the cause
663 of a persistent failure not just
664 'timed out' */
665 unsigned short ack_backlog;
666 unsigned short max_ack_backlog;
667 __u32 priority;
668 unsigned short type;
669 unsigned char localroute; /* Route locally only */
670 unsigned char protocol;
671 struct ucred peercred;
672 int rcvlowat;
673 long rcvtimeo;
674 long sndtimeo;
675
676 #ifdef CONFIG_FILTER
677 /* Socket Filtering Instructions */
678 struct sk_filter *filter;
679 #endif /* CONFIG_FILTER */
680
681 /* This is where all the private (optional) areas that don't
682 * overlap will eventually live.
683 */
684 union {
685 void *destruct_hook;
686 struct unix_opt af_unix;
687 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
688 struct inet_opt af_inet;
689 #endif
690 #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
691 struct atalk_sock af_at;
692 #endif
693 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
694 struct ipx_opt af_ipx;
695 #endif
696 #if defined (CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
697 struct dn_scp dn;
698 #endif
699 #if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE)
700 struct packet_opt *af_packet;
701 #endif
702 #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
703 x25_cb *x25;
704 #endif
705 #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
706 ax25_cb *ax25;
707 #endif
708 #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
709 nr_cb *nr;
710 #endif
711 #if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
712 rose_cb *rose;
713 #endif
714 #if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
715 struct pppox_opt *pppox;
716 #endif
717 struct netlink_opt *af_netlink;
718 #if defined(CONFIG_ECONET) || defined(CONFIG_ECONET_MODULE)
719 struct econet_opt *af_econet;
720 #endif
721 #if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
722 struct atm_vcc *af_atm;
723 #endif
724 #if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
725 struct irda_sock *irda;
726 #endif
727 #if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
728 struct wanpipe_opt *af_wanpipe;
729 #endif
730 } protinfo;
731
732
733 /* This part is used for the timeout functions. */
734 struct timer_list timer; /* This is the sock cleanup timer. */
735 struct timeval stamp;
736
737 /* Identd and reporting IO signals */
738 struct socket *socket;
739
740 /* RPC layer private data */
741 void *user_data;
742
743 /* Callbacks */
744 void (*state_change)(struct sock *sk);
745 void (*data_ready)(struct sock *sk,int bytes);
746 void (*write_space)(struct sock *sk);
747 void (*error_report)(struct sock *sk);
748
749 int (*backlog_rcv) (struct sock *sk,
750 struct sk_buff *skb);
751 void (*destruct)(struct sock *sk);
752 };
753
754 /* The per-socket spinlock must be held here. */
755 #define sk_add_backlog(__sk, __skb) \
756 do { if((__sk)->backlog.tail == NULL) { \
757 (__sk)->backlog.head = \
758 (__sk)->backlog.tail = (__skb); \
759 } else { \
760 ((__sk)->backlog.tail)->next = (__skb); \
761 (__sk)->backlog.tail = (__skb); \
762 } \
763 (__skb)->next = NULL; \
764 } while(0)
765
766 /* IP protocol blocks we attach to sockets.
767 * socket layer -> transport layer interface
768 * transport -> network interface is defined by struct inet_proto
769 */
770 struct proto {
771 void (*close)(struct sock *sk,
772 long timeout);
773 int (*connect)(struct sock *sk,
774 struct sockaddr *uaddr,
775 int addr_len);
776 int (*disconnect)(struct sock *sk, int flags);
777
778 struct sock * (*accept) (struct sock *sk, int flags, int *err);
779
780 int (*ioctl)(struct sock *sk, int cmd,
781 unsigned long arg);
782 int (*init)(struct sock *sk);
783 int (*destroy)(struct sock *sk);
784 void (*shutdown)(struct sock *sk, int how);
785 int (*setsockopt)(struct sock *sk, int level,
786 int optname, char *optval, int optlen);
787 int (*getsockopt)(struct sock *sk, int level,
788 int optname, char *optval,
789 int *option);
790 int (*sendmsg)(struct sock *sk, struct msghdr *msg,
791 int len);
792 int (*recvmsg)(struct sock *sk, struct msghdr *msg,
793 int len, int noblock, int flags,
794 int *addr_len);
795 int (*bind)(struct sock *sk,
796 struct sockaddr *uaddr, int addr_len);
797
798 int (*backlog_rcv) (struct sock *sk,
799 struct sk_buff *skb);
800
801 /* Keeping track of sk's, looking them up, and port selection methods. */
802 void (*hash)(struct sock *sk);
803 void (*unhash)(struct sock *sk);
804 int (*get_port)(struct sock *sk, unsigned short snum);
805
806 char name[32];
807
808 struct {
809 int inuse;
810 u8 __pad[SMP_CACHE_BYTES - sizeof(int)];
811 } stats[NR_CPUS];
812 };
813
814 /* Called with local bh disabled */
sock_prot_inc_use(struct proto * prot)815 static __inline__ void sock_prot_inc_use(struct proto *prot)
816 {
817 prot->stats[smp_processor_id()].inuse++;
818 }
819
sock_prot_dec_use(struct proto * prot)820 static __inline__ void sock_prot_dec_use(struct proto *prot)
821 {
822 prot->stats[smp_processor_id()].inuse--;
823 }
824
825 /* About 10 seconds */
826 #define SOCK_DESTROY_TIME (10*HZ)
827
828 /* Sockets 0-1023 can't be bound to unless you are superuser */
829 #define PROT_SOCK 1024
830
831 #define SHUTDOWN_MASK 3
832 #define RCV_SHUTDOWN 1
833 #define SEND_SHUTDOWN 2
834
835 #define SOCK_SNDBUF_LOCK 1
836 #define SOCK_RCVBUF_LOCK 2
837 #define SOCK_BINDADDR_LOCK 4
838 #define SOCK_BINDPORT_LOCK 8
839
840
841 /* Used by processes to "lock" a socket state, so that
842 * interrupts and bottom half handlers won't change it
843 * from under us. It essentially blocks any incoming
844 * packets, so that we won't get any new data or any
845 * packets that change the state of the socket.
846 *
847 * While locked, BH processing will add new packets to
848 * the backlog queue. This queue is processed by the
849 * owner of the socket lock right before it is released.
850 *
851 * Since ~2.3.5 it is also exclusive sleep lock serializing
852 * accesses from user process context.
853 */
854 extern void __lock_sock(struct sock *sk);
855 extern void __release_sock(struct sock *sk);
856 #define lock_sock(__sk) \
857 do { spin_lock_bh(&((__sk)->lock.slock)); \
858 if ((__sk)->lock.users != 0) \
859 __lock_sock(__sk); \
860 (__sk)->lock.users = 1; \
861 spin_unlock_bh(&((__sk)->lock.slock)); \
862 } while(0)
863
864 #define release_sock(__sk) \
865 do { spin_lock_bh(&((__sk)->lock.slock)); \
866 if ((__sk)->backlog.tail != NULL) \
867 __release_sock(__sk); \
868 (__sk)->lock.users = 0; \
869 if (waitqueue_active(&((__sk)->lock.wq))) wake_up(&((__sk)->lock.wq)); \
870 spin_unlock_bh(&((__sk)->lock.slock)); \
871 } while(0)
872
873 /* BH context may only use the following locking interface. */
874 #define bh_lock_sock(__sk) spin_lock(&((__sk)->lock.slock))
875 #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->lock.slock))
876
877 extern struct sock * sk_alloc(int family, int priority, int zero_it);
878 extern void sk_free(struct sock *sk);
879
880 extern struct sk_buff *sock_wmalloc(struct sock *sk,
881 unsigned long size, int force,
882 int priority);
883 extern struct sk_buff *sock_rmalloc(struct sock *sk,
884 unsigned long size, int force,
885 int priority);
886 extern void sock_wfree(struct sk_buff *skb);
887 extern void sock_rfree(struct sk_buff *skb);
888
889 extern int sock_setsockopt(struct socket *sock, int level,
890 int op, char *optval,
891 int optlen);
892
893 extern int sock_getsockopt(struct socket *sock, int level,
894 int op, char *optval,
895 int *optlen);
896 extern struct sk_buff *sock_alloc_send_skb(struct sock *sk,
897 unsigned long size,
898 int noblock,
899 int *errcode);
900 extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
901 unsigned long header_len,
902 unsigned long data_len,
903 int noblock,
904 int *errcode);
905 extern void *sock_kmalloc(struct sock *sk, int size, int priority);
906 extern void sock_kfree_s(struct sock *sk, void *mem, int size);
907
908 /*
909 * Functions to fill in entries in struct proto_ops when a protocol
910 * does not implement a particular function.
911 */
912 extern int sock_no_release(struct socket *);
913 extern int sock_no_bind(struct socket *,
914 struct sockaddr *, int);
915 extern int sock_no_connect(struct socket *,
916 struct sockaddr *, int, int);
917 extern int sock_no_socketpair(struct socket *,
918 struct socket *);
919 extern int sock_no_accept(struct socket *,
920 struct socket *, int);
921 extern int sock_no_getname(struct socket *,
922 struct sockaddr *, int *, int);
923 extern unsigned int sock_no_poll(struct file *, struct socket *,
924 struct poll_table_struct *);
925 extern int sock_no_ioctl(struct socket *, unsigned int,
926 unsigned long);
927 extern int sock_no_listen(struct socket *, int);
928 extern int sock_no_shutdown(struct socket *, int);
929 extern int sock_no_getsockopt(struct socket *, int , int,
930 char *, int *);
931 extern int sock_no_setsockopt(struct socket *, int, int,
932 char *, int);
933 extern int sock_no_fcntl(struct socket *,
934 unsigned int, unsigned long);
935 extern int sock_no_sendmsg(struct socket *,
936 struct msghdr *, int,
937 struct scm_cookie *);
938 extern int sock_no_recvmsg(struct socket *,
939 struct msghdr *, int, int,
940 struct scm_cookie *);
941 extern int sock_no_mmap(struct file *file,
942 struct socket *sock,
943 struct vm_area_struct *vma);
944 extern ssize_t sock_no_sendpage(struct socket *sock,
945 struct page *page,
946 int offset, size_t size,
947 int flags);
948
949 /*
950 * Default socket callbacks and setup code
951 */
952
953 extern void sock_def_destruct(struct sock *);
954
955 /* Initialise core socket variables */
956 extern void sock_init_data(struct socket *sock, struct sock *sk);
957
958 extern void sklist_remove_socket(struct sock **list, struct sock *sk);
959 extern void sklist_insert_socket(struct sock **list, struct sock *sk);
960 extern void sklist_destroy_socket(struct sock **list, struct sock *sk);
961
962 #ifdef CONFIG_FILTER
963
964 /**
965 * sk_filter - run a packet through a socket filter
966 * @sk: sock associated with &sk_buff
967 * @skb: buffer to filter
968 * @needlock: set to 1 if the sock is not locked by caller.
969 *
970 * Run the filter code and then cut skb->data to correct size returned by
971 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
972 * than pkt_len we keep whole skb->data. This is the socket level
973 * wrapper to sk_run_filter. It returns 0 if the packet should
974 * be accepted or -EPERM if the packet should be tossed.
975 */
976
sk_filter(struct sock * sk,struct sk_buff * skb,int needlock)977 static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
978 {
979 int err = 0;
980
981 if (sk->filter) {
982 struct sk_filter *filter;
983
984 if (needlock)
985 bh_lock_sock(sk);
986
987 filter = sk->filter;
988 if (filter) {
989 int pkt_len = sk_run_filter(skb, filter->insns,
990 filter->len);
991 if (!pkt_len)
992 err = -EPERM;
993 else
994 skb_trim(skb, pkt_len);
995 }
996
997 if (needlock)
998 bh_unlock_sock(sk);
999 }
1000 return err;
1001 }
1002
1003 /**
1004 * sk_filter_release: Release a socket filter
1005 * @sk: socket
1006 * @fp: filter to remove
1007 *
1008 * Remove a filter from a socket and release its resources.
1009 */
1010
sk_filter_release(struct sock * sk,struct sk_filter * fp)1011 static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
1012 {
1013 unsigned int size = sk_filter_len(fp);
1014
1015 atomic_sub(size, &sk->omem_alloc);
1016
1017 if (atomic_dec_and_test(&fp->refcnt))
1018 kfree(fp);
1019 }
1020
sk_filter_charge(struct sock * sk,struct sk_filter * fp)1021 static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1022 {
1023 atomic_inc(&fp->refcnt);
1024 atomic_add(sk_filter_len(fp), &sk->omem_alloc);
1025 }
1026
1027 #else
1028
sk_filter(struct sock * sk,struct sk_buff * skb,int needlock)1029 static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
1030 {
1031 return 0;
1032 }
1033
1034 #endif /* CONFIG_FILTER */
1035
1036 /*
1037 * Socket reference counting postulates.
1038 *
1039 * * Each user of socket SHOULD hold a reference count.
1040 * * Each access point to socket (an hash table bucket, reference from a list,
1041 * running timer, skb in flight MUST hold a reference count.
1042 * * When reference count hits 0, it means it will never increase back.
1043 * * When reference count hits 0, it means that no references from
1044 * outside exist to this socket and current process on current CPU
1045 * is last user and may/should destroy this socket.
1046 * * sk_free is called from any context: process, BH, IRQ. When
1047 * it is called, socket has no references from outside -> sk_free
1048 * may release descendant resources allocated by the socket, but
1049 * to the time when it is called, socket is NOT referenced by any
1050 * hash tables, lists etc.
1051 * * Packets, delivered from outside (from network or from another process)
1052 * and enqueued on receive/error queues SHOULD NOT grab reference count,
1053 * when they sit in queue. Otherwise, packets will leak to hole, when
1054 * socket is looked up by one cpu and unhasing is made by another CPU.
1055 * It is true for udp/raw, netlink (leak to receive and error queues), tcp
1056 * (leak to backlog). Packet socket does all the processing inside
1057 * BR_NETPROTO_LOCK, so that it has not this race condition. UNIX sockets
1058 * use separate SMP lock, so that they are prone too.
1059 */
1060
1061 /* Grab socket reference count. This operation is valid only
1062 when sk is ALREADY grabbed f.e. it is found in hash table
1063 or a list and the lookup is made under lock preventing hash table
1064 modifications.
1065 */
1066
sock_hold(struct sock * sk)1067 static inline void sock_hold(struct sock *sk)
1068 {
1069 atomic_inc(&sk->refcnt);
1070 }
1071
1072 /* Ungrab socket in the context, which assumes that socket refcnt
1073 cannot hit zero, f.e. it is true in context of any socketcall.
1074 */
__sock_put(struct sock * sk)1075 static inline void __sock_put(struct sock *sk)
1076 {
1077 atomic_dec(&sk->refcnt);
1078 }
1079
1080 /* Ungrab socket and destroy it, if it was the last reference. */
sock_put(struct sock * sk)1081 static inline void sock_put(struct sock *sk)
1082 {
1083 if (atomic_dec_and_test(&sk->refcnt))
1084 sk_free(sk);
1085 }
1086
1087 /* Detach socket from process context.
1088 * Announce socket dead, detach it from wait queue and inode.
1089 * Note that parent inode held reference count on this struct sock,
1090 * we do not release it in this function, because protocol
1091 * probably wants some additional cleanups or even continuing
1092 * to work with this socket (TCP).
1093 */
sock_orphan(struct sock * sk)1094 static inline void sock_orphan(struct sock *sk)
1095 {
1096 write_lock_bh(&sk->callback_lock);
1097 sk->dead = 1;
1098 sk->socket = NULL;
1099 sk->sleep = NULL;
1100 write_unlock_bh(&sk->callback_lock);
1101 }
1102
sock_graft(struct sock * sk,struct socket * parent)1103 static inline void sock_graft(struct sock *sk, struct socket *parent)
1104 {
1105 write_lock_bh(&sk->callback_lock);
1106 sk->sleep = &parent->wait;
1107 parent->sk = sk;
1108 sk->socket = parent;
1109 write_unlock_bh(&sk->callback_lock);
1110 }
1111
sock_i_uid(struct sock * sk)1112 static inline int sock_i_uid(struct sock *sk)
1113 {
1114 int uid;
1115
1116 read_lock(&sk->callback_lock);
1117 uid = sk->socket ? sk->socket->inode->i_uid : 0;
1118 read_unlock(&sk->callback_lock);
1119 return uid;
1120 }
1121
sock_i_ino(struct sock * sk)1122 static inline unsigned long sock_i_ino(struct sock *sk)
1123 {
1124 unsigned long ino;
1125
1126 read_lock(&sk->callback_lock);
1127 ino = sk->socket ? sk->socket->inode->i_ino : 0;
1128 read_unlock(&sk->callback_lock);
1129 return ino;
1130 }
1131
1132 static inline struct dst_entry *
__sk_dst_get(struct sock * sk)1133 __sk_dst_get(struct sock *sk)
1134 {
1135 return sk->dst_cache;
1136 }
1137
1138 static inline struct dst_entry *
sk_dst_get(struct sock * sk)1139 sk_dst_get(struct sock *sk)
1140 {
1141 struct dst_entry *dst;
1142
1143 read_lock(&sk->dst_lock);
1144 dst = sk->dst_cache;
1145 if (dst)
1146 dst_hold(dst);
1147 read_unlock(&sk->dst_lock);
1148 return dst;
1149 }
1150
1151 static inline void
__sk_dst_set(struct sock * sk,struct dst_entry * dst)1152 __sk_dst_set(struct sock *sk, struct dst_entry *dst)
1153 {
1154 struct dst_entry *old_dst;
1155
1156 old_dst = sk->dst_cache;
1157 sk->dst_cache = dst;
1158 dst_release(old_dst);
1159 }
1160
1161 static inline void
sk_dst_set(struct sock * sk,struct dst_entry * dst)1162 sk_dst_set(struct sock *sk, struct dst_entry *dst)
1163 {
1164 write_lock(&sk->dst_lock);
1165 __sk_dst_set(sk, dst);
1166 write_unlock(&sk->dst_lock);
1167 }
1168
1169 static inline void
__sk_dst_reset(struct sock * sk)1170 __sk_dst_reset(struct sock *sk)
1171 {
1172 struct dst_entry *old_dst;
1173
1174 old_dst = sk->dst_cache;
1175 sk->dst_cache = NULL;
1176 dst_release(old_dst);
1177 }
1178
1179 static inline void
sk_dst_reset(struct sock * sk)1180 sk_dst_reset(struct sock *sk)
1181 {
1182 write_lock(&sk->dst_lock);
1183 __sk_dst_reset(sk);
1184 write_unlock(&sk->dst_lock);
1185 }
1186
1187 static inline struct dst_entry *
__sk_dst_check(struct sock * sk,u32 cookie)1188 __sk_dst_check(struct sock *sk, u32 cookie)
1189 {
1190 struct dst_entry *dst = sk->dst_cache;
1191
1192 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
1193 sk->dst_cache = NULL;
1194 return NULL;
1195 }
1196
1197 return dst;
1198 }
1199
1200 static inline struct dst_entry *
sk_dst_check(struct sock * sk,u32 cookie)1201 sk_dst_check(struct sock *sk, u32 cookie)
1202 {
1203 struct dst_entry *dst = sk_dst_get(sk);
1204
1205 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
1206 sk_dst_reset(sk);
1207 return NULL;
1208 }
1209
1210 return dst;
1211 }
1212
1213
1214 /*
1215 * Queue a received datagram if it will fit. Stream and sequenced
1216 * protocols can't normally use this as they need to fit buffers in
1217 * and play with them.
1218 *
1219 * Inlined as it's very short and called for pretty much every
1220 * packet ever received.
1221 */
1222
skb_set_owner_w(struct sk_buff * skb,struct sock * sk)1223 static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1224 {
1225 sock_hold(sk);
1226 skb->sk = sk;
1227 skb->destructor = sock_wfree;
1228 atomic_add(skb->truesize, &sk->wmem_alloc);
1229 }
1230
skb_set_owner_r(struct sk_buff * skb,struct sock * sk)1231 static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
1232 {
1233 skb->sk = sk;
1234 skb->destructor = sock_rfree;
1235 atomic_add(skb->truesize, &sk->rmem_alloc);
1236 }
1237
sock_queue_rcv_skb(struct sock * sk,struct sk_buff * skb)1238 static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1239 {
1240 int err = 0;
1241 int skb_len;
1242
1243 /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
1244 number of warnings when compiling with -W --ANK
1245 */
1246 if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf) {
1247 err = -ENOMEM;
1248 goto out;
1249 }
1250
1251 /* It would be deadlock, if sock_queue_rcv_skb is used
1252 with socket lock! We assume that users of this
1253 function are lock free.
1254 */
1255 err = sk_filter(sk, skb, 1);
1256 if (err)
1257 goto out;
1258
1259 skb->dev = NULL;
1260 skb_set_owner_r(skb, sk);
1261
1262 /* Cache the SKB length before we tack it onto the receive
1263 * queue. Once it is added it no longer belongs to us and
1264 * may be freed by other threads of control pulling packets
1265 * from the queue.
1266 */
1267 skb_len = skb->len;
1268
1269 skb_queue_tail(&sk->receive_queue, skb);
1270 if (!sk->dead)
1271 sk->data_ready(sk,skb_len);
1272 out:
1273 return err;
1274 }
1275
sock_queue_err_skb(struct sock * sk,struct sk_buff * skb)1276 static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
1277 {
1278 /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
1279 number of warnings when compiling with -W --ANK
1280 */
1281 if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
1282 return -ENOMEM;
1283 skb_set_owner_r(skb, sk);
1284 skb_queue_tail(&sk->error_queue,skb);
1285 if (!sk->dead)
1286 sk->data_ready(sk,skb->len);
1287 return 0;
1288 }
1289
1290 /*
1291 * Recover an error report and clear atomically
1292 */
1293
sock_error(struct sock * sk)1294 static inline int sock_error(struct sock *sk)
1295 {
1296 int err=xchg(&sk->err,0);
1297 return -err;
1298 }
1299
sock_wspace(struct sock * sk)1300 static inline unsigned long sock_wspace(struct sock *sk)
1301 {
1302 int amt = 0;
1303
1304 if (!(sk->shutdown & SEND_SHUTDOWN)) {
1305 amt = sk->sndbuf - atomic_read(&sk->wmem_alloc);
1306 if (amt < 0)
1307 amt = 0;
1308 }
1309 return amt;
1310 }
1311
sk_wake_async(struct sock * sk,int how,int band)1312 static inline void sk_wake_async(struct sock *sk, int how, int band)
1313 {
1314 if (sk->socket && sk->socket->fasync_list)
1315 sock_wake_async(sk->socket, how, band);
1316 }
1317
1318 #define SOCK_MIN_SNDBUF 2048
1319 #define SOCK_MIN_RCVBUF 256
1320
1321 /*
1322 * Default write policy as shown to user space via poll/select/SIGIO
1323 */
sock_writeable(struct sock * sk)1324 static inline int sock_writeable(struct sock *sk)
1325 {
1326 return atomic_read(&sk->wmem_alloc) < (sk->sndbuf / 2);
1327 }
1328
gfp_any(void)1329 static inline int gfp_any(void)
1330 {
1331 return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
1332 }
1333
sock_rcvtimeo(struct sock * sk,int noblock)1334 static inline long sock_rcvtimeo(struct sock *sk, int noblock)
1335 {
1336 return noblock ? 0 : sk->rcvtimeo;
1337 }
1338
sock_sndtimeo(struct sock * sk,int noblock)1339 static inline long sock_sndtimeo(struct sock *sk, int noblock)
1340 {
1341 return noblock ? 0 : sk->sndtimeo;
1342 }
1343
sock_rcvlowat(struct sock * sk,int waitall,int len)1344 static inline int sock_rcvlowat(struct sock *sk, int waitall, int len)
1345 {
1346 return (waitall ? len : min_t(int, sk->rcvlowat, len)) ? : 1;
1347 }
1348
1349 /* Alas, with timeout socket operations are not restartable.
1350 * Compare this to poll().
1351 */
sock_intr_errno(long timeo)1352 static inline int sock_intr_errno(long timeo)
1353 {
1354 return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
1355 }
1356
1357 static __inline__ void
sock_recv_timestamp(struct msghdr * msg,struct sock * sk,struct sk_buff * skb)1358 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
1359 {
1360 if (sk->rcvtstamp)
1361 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(skb->stamp), &skb->stamp);
1362 else
1363 sk->stamp = skb->stamp;
1364 }
1365
1366 /*
1367 * Enable debug/info messages
1368 */
1369
1370 #if 0
1371 #define NETDEBUG(x) do { } while (0)
1372 #else
1373 #define NETDEBUG(x) do { x; } while (0)
1374 #endif
1375
1376 /*
1377 * Macros for sleeping on a socket. Use them like this:
1378 *
1379 * SOCK_SLEEP_PRE(sk)
1380 * if (condition)
1381 * schedule();
1382 * SOCK_SLEEP_POST(sk)
1383 *
1384 */
1385
1386 #define SOCK_SLEEP_PRE(sk) { struct task_struct *tsk = current; \
1387 DECLARE_WAITQUEUE(wait, tsk); \
1388 tsk->state = TASK_INTERRUPTIBLE; \
1389 add_wait_queue((sk)->sleep, &wait); \
1390 release_sock(sk);
1391
1392 #define SOCK_SLEEP_POST(sk) tsk->state = TASK_RUNNING; \
1393 remove_wait_queue((sk)->sleep, &wait); \
1394 lock_sock(sk); \
1395 }
1396
1397 extern __u32 sysctl_wmem_max;
1398 extern __u32 sysctl_rmem_max;
1399
1400 #endif /* _SOCK_H */
1401