1 /*
2  * linux/net/sunrpc/svcsock.c
3  *
4  * These are the RPC server socket internals.
5  *
6  * The server scheduling algorithm does not always distribute the load
7  * evenly when servicing a single client. May need to modify the
8  * svc_sock_enqueue procedure...
9  *
10  * TCP support is largely untested and may be a little slow. The problem
11  * is that we currently do two separate recvfrom's, one for the 4-byte
12  * record length, and the second for the actual record. This could possibly
13  * be improved by always reading a minimum size of around 100 bytes and
14  * tucking any superfluous bytes away in a temporary store. Still, that
15  * leaves write requests out in the rain. An alternative may be to peek at
16  * the first skb in the queue, and if it matches the next TCP sequence
17  * number, to extract the record marker. Yuck.
18  *
19  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
20  */
21 
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/fcntl.h>
25 #include <linux/net.h>
26 #include <linux/in.h>
27 #include <linux/inet.h>
28 #include <linux/udp.h>
29 #include <linux/version.h>
30 #include <linux/unistd.h>
31 #include <linux/slab.h>
32 #include <linux/netdevice.h>
33 #include <linux/skbuff.h>
34 #include <net/sock.h>
35 #include <net/checksum.h>
36 #include <net/ip.h>
37 #include <asm/uaccess.h>
38 #include <asm/ioctls.h>
39 
40 #include <linux/sunrpc/types.h>
41 #include <linux/sunrpc/xdr.h>
42 #include <linux/sunrpc/svcsock.h>
43 #include <linux/sunrpc/stats.h>
44 
45 /* SMP locking strategy:
46  *
47  * 	svc_serv->sv_lock protects most stuff for that service.
48  *
49  *	Some flags can be set to certain values at any time
50  *	providing that certain rules are followed:
51  *
52  *	SK_BUSY  can be set to 0 at any time.
53  *		svc_sock_enqueue must be called afterwards
54  *	SK_CONN, SK_DATA, can be set or cleared at any time.
55  *		after a set, svc_sock_enqueue must be called.
56  *		after a clear, the socket must be read/accepted
57  *		 if this succeeds, it must be set again.
58  *	SK_CLOSE can set at any time. It is never cleared.
59  *
60  */
61 
62 #define RPCDBG_FACILITY	RPCDBG_SVCSOCK
63 
64 
65 static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
66 					 int *errp, int pmap_reg);
67 static void		svc_udp_data_ready(struct sock *, int);
68 static int		svc_udp_recvfrom(struct svc_rqst *);
69 static int		svc_udp_sendto(struct svc_rqst *);
70 
71 
72 /*
73  * Queue up an idle server thread.  Must have serv->sv_lock held.
74  * Note: this is really a stack rather than a queue, so that we only
75  * use as many different threads as we need, and the rest don't polute
76  * the cache.
77  */
78 static inline void
svc_serv_enqueue(struct svc_serv * serv,struct svc_rqst * rqstp)79 svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp)
80 {
81 	list_add(&rqstp->rq_list, &serv->sv_threads);
82 }
83 
84 /*
85  * Dequeue an nfsd thread.  Must have serv->sv_lock held.
86  */
87 static inline void
svc_serv_dequeue(struct svc_serv * serv,struct svc_rqst * rqstp)88 svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp)
89 {
90 	list_del(&rqstp->rq_list);
91 }
92 
93 /*
94  * Release an skbuff after use
95  */
96 static inline void
svc_release_skb(struct svc_rqst * rqstp)97 svc_release_skb(struct svc_rqst *rqstp)
98 {
99 	struct sk_buff *skb = rqstp->rq_skbuff;
100 
101 	if (!skb)
102 		return;
103 	rqstp->rq_skbuff = NULL;
104 
105 	dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
106 	skb_free_datagram(rqstp->rq_sock->sk_sk, skb);
107 }
108 
109 /*
110  * Queue up a socket with data pending. If there are idle nfsd
111  * processes, wake 'em up.
112  *
113  */
114 static void
svc_sock_enqueue(struct svc_sock * svsk)115 svc_sock_enqueue(struct svc_sock *svsk)
116 {
117 	struct svc_serv	*serv = svsk->sk_server;
118 	struct svc_rqst	*rqstp;
119 
120 	if (!(svsk->sk_flags &
121 	      ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)) ))
122 		return;
123 	if (test_bit(SK_DEAD, &svsk->sk_flags))
124 		return;
125 
126 	spin_lock_bh(&serv->sv_lock);
127 
128 	if (!list_empty(&serv->sv_threads) &&
129 	    !list_empty(&serv->sv_sockets))
130 		printk(KERN_ERR
131 			"svc_sock_enqueue: threads and sockets both waiting??\n");
132 
133 	if (test_bit(SK_BUSY, &svsk->sk_flags)) {
134 		/* Don't enqueue socket while daemon is receiving */
135 		dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
136 		goto out_unlock;
137 	}
138 
139 	if (((svsk->sk_reserved + serv->sv_bufsz)*2
140 	     > sock_wspace(svsk->sk_sk))
141 	    && !test_bit(SK_CLOSE, &svsk->sk_flags)
142 	    && !test_bit(SK_CONN, &svsk->sk_flags)) {
143 		/* Don't enqueue while not enough space for reply */
144 		dprintk("svc: socket %p  no space, %d*2 > %ld, not enqueued\n",
145 			svsk->sk_sk, svsk->sk_reserved+serv->sv_bufsz,
146 			sock_wspace(svsk->sk_sk));
147 		goto out_unlock;
148 	}
149 
150 	/* Mark socket as busy. It will remain in this state until the
151 	 * server has processed all pending data and put the socket back
152 	 * on the idle list.
153 	 */
154 	set_bit(SK_BUSY, &svsk->sk_flags);
155 
156 	if (!list_empty(&serv->sv_threads)) {
157 		rqstp = list_entry(serv->sv_threads.next,
158 				   struct svc_rqst,
159 				   rq_list);
160 		dprintk("svc: socket %p served by daemon %p\n",
161 			svsk->sk_sk, rqstp);
162 		svc_serv_dequeue(serv, rqstp);
163 		if (rqstp->rq_sock)
164 			printk(KERN_ERR
165 				"svc_sock_enqueue: server %p, rq_sock=%p!\n",
166 				rqstp, rqstp->rq_sock);
167 		rqstp->rq_sock = svsk;
168 		svsk->sk_inuse++;
169 		rqstp->rq_reserved = serv->sv_bufsz;
170 		svsk->sk_reserved += rqstp->rq_reserved;
171 		wake_up(&rqstp->rq_wait);
172 	} else {
173 		dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
174 		list_add_tail(&svsk->sk_ready, &serv->sv_sockets);
175 		set_bit(SK_QUED, &svsk->sk_flags);
176 	}
177 
178 out_unlock:
179 	spin_unlock_bh(&serv->sv_lock);
180 }
181 
182 /*
183  * Dequeue the first socket.  Must be called with the serv->sv_lock held.
184  */
185 static inline struct svc_sock *
svc_sock_dequeue(struct svc_serv * serv)186 svc_sock_dequeue(struct svc_serv *serv)
187 {
188 	struct svc_sock	*svsk;
189 
190 	if (list_empty(&serv->sv_sockets))
191 		return NULL;
192 
193 	svsk = list_entry(serv->sv_sockets.next,
194 			  struct svc_sock, sk_ready);
195 	list_del(&svsk->sk_ready);
196 
197 	dprintk("svc: socket %p dequeued, inuse=%d\n",
198 		svsk->sk_sk, svsk->sk_inuse);
199 	clear_bit(SK_QUED, &svsk->sk_flags);
200 
201 	return svsk;
202 }
203 
204 /*
205  * Having read something from a socket, check whether it
206  * needs to be re-enqueued.
207  * Note: SK_DATA only gets cleared when a read-attempt finds
208  * no (or insufficient) data.
209  */
210 static inline void
svc_sock_received(struct svc_sock * svsk)211 svc_sock_received(struct svc_sock *svsk)
212 {
213 	clear_bit(SK_BUSY, &svsk->sk_flags);
214 	svc_sock_enqueue(svsk);
215 }
216 
217 
218 /**
219  * svc_reserve - change the space reserved for the reply to a request.
220  * @rqstp:  The request in question
221  * @space: new max space to reserve
222  *
223  * Each request reserves some space on the output queue of the socket
224  * to make sure the reply fits.  This function reduces that reserved
225  * space to be the amount of space used already, plus @space.
226  *
227  */
svc_reserve(struct svc_rqst * rqstp,int space)228 void svc_reserve(struct svc_rqst *rqstp, int space)
229 {
230 	space += rqstp->rq_resbuf.len<<2;
231 
232 	if (space < rqstp->rq_reserved) {
233 		struct svc_sock *svsk = rqstp->rq_sock;
234 		spin_lock_bh(&svsk->sk_server->sv_lock);
235 		svsk->sk_reserved -= (rqstp->rq_reserved - space);
236 		rqstp->rq_reserved = space;
237 		spin_unlock_bh(&svsk->sk_server->sv_lock);
238 
239 		svc_sock_enqueue(svsk);
240 	}
241 }
242 
243 /*
244  * Release a socket after use.
245  */
246 static inline void
svc_sock_put(struct svc_sock * svsk)247 svc_sock_put(struct svc_sock *svsk)
248 {
249 	struct svc_serv *serv = svsk->sk_server;
250 
251 	spin_lock_bh(&serv->sv_lock);
252 	if (!--(svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) {
253 		spin_unlock_bh(&serv->sv_lock);
254 		dprintk("svc: releasing dead socket\n");
255 		sock_release(svsk->sk_sock);
256 		kfree(svsk);
257 	}
258 	else
259 		spin_unlock_bh(&serv->sv_lock);
260 }
261 
262 static void
svc_sock_release(struct svc_rqst * rqstp)263 svc_sock_release(struct svc_rqst *rqstp)
264 {
265 	struct svc_sock	*svsk = rqstp->rq_sock;
266 
267 	svc_release_skb(rqstp);
268 
269 	/* Reset response buffer and release
270 	 * the reservation.
271 	 * But first, check that enough space was reserved
272 	 * for the reply, otherwise we have a bug!
273 	 */
274 	if ((rqstp->rq_resbuf.len<<2) >  rqstp->rq_reserved)
275 		printk(KERN_ERR "RPC request reserved %d but used %d\n",
276 		       rqstp->rq_reserved,
277 		       rqstp->rq_resbuf.len<<2);
278 
279 	rqstp->rq_resbuf.buf = rqstp->rq_resbuf.base;
280 	rqstp->rq_resbuf.len = 0;
281 	svc_reserve(rqstp, 0);
282 	rqstp->rq_sock = NULL;
283 
284 	svc_sock_put(svsk);
285 }
286 
287 /*
288  * External function to wake up a server waiting for data
289  */
290 void
svc_wake_up(struct svc_serv * serv)291 svc_wake_up(struct svc_serv *serv)
292 {
293 	struct svc_rqst	*rqstp;
294 
295 	spin_lock_bh(&serv->sv_lock);
296 	if (!list_empty(&serv->sv_threads)) {
297 		rqstp = list_entry(serv->sv_threads.next,
298 				   struct svc_rqst,
299 				   rq_list);
300 		dprintk("svc: daemon %p woken up.\n", rqstp);
301 		/*
302 		svc_serv_dequeue(serv, rqstp);
303 		rqstp->rq_sock = NULL;
304 		 */
305 		wake_up(&rqstp->rq_wait);
306 	}
307 	spin_unlock_bh(&serv->sv_lock);
308 }
309 
310 /*
311  * Generic sendto routine
312  */
313 static int
svc_sendto(struct svc_rqst * rqstp,struct iovec * iov,int nr)314 svc_sendto(struct svc_rqst *rqstp, struct iovec *iov, int nr)
315 {
316 	mm_segment_t	oldfs;
317 	struct svc_sock	*svsk = rqstp->rq_sock;
318 	struct socket	*sock = svsk->sk_sock;
319 	struct msghdr	msg;
320 	char 		buffer[CMSG_SPACE(sizeof(struct in_pktinfo))];
321 	struct cmsghdr *cmh = (struct cmsghdr *)buffer;
322 	struct in_pktinfo *pki = (struct in_pktinfo *)CMSG_DATA(cmh);
323 	int		i, buflen, len;
324 
325 	for (i = buflen = 0; i < nr; i++)
326 		buflen += iov[i].iov_len;
327 
328 	msg.msg_name    = &rqstp->rq_addr;
329 	msg.msg_namelen = sizeof(rqstp->rq_addr);
330 	msg.msg_iov     = iov;
331 	msg.msg_iovlen  = nr;
332 	if (rqstp->rq_prot == IPPROTO_UDP) {
333 		msg.msg_control = cmh;
334 		msg.msg_controllen = sizeof(buffer);
335 		cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
336 		cmh->cmsg_level = SOL_IP;
337 		cmh->cmsg_type = IP_PKTINFO;
338 		pki->ipi_ifindex = 0;
339 		pki->ipi_spec_dst.s_addr = rqstp->rq_daddr;
340 	} else {
341 		msg.msg_control = NULL;
342 		msg.msg_controllen = 0;
343 	}
344 
345 	/* This was MSG_DONTWAIT, but I now want it to wait.
346 	 * The only thing that it would wait for is memory and
347 	 * if we are fairly low on memory, then we aren't likely
348 	 * to make much progress anyway.
349 	 * sk->sndtimeo is set to 30seconds just in case.
350 	 */
351 	msg.msg_flags	= 0;
352 
353 	oldfs = get_fs(); set_fs(KERNEL_DS);
354 	len = sock_sendmsg(sock, &msg, buflen);
355 	set_fs(oldfs);
356 
357 	dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d\n",
358 			rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len);
359 
360 	return len;
361 }
362 
363 /*
364  * Check input queue length
365  */
366 static int
svc_recv_available(struct svc_sock * svsk)367 svc_recv_available(struct svc_sock *svsk)
368 {
369 	mm_segment_t	oldfs;
370 	struct socket	*sock = svsk->sk_sock;
371 	int		avail, err;
372 
373 	oldfs = get_fs(); set_fs(KERNEL_DS);
374 	err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
375 	set_fs(oldfs);
376 
377 	return (err >= 0)? avail : err;
378 }
379 
380 /*
381  * Generic recvfrom routine.
382  */
383 static int
svc_recvfrom(struct svc_rqst * rqstp,struct iovec * iov,int nr,int buflen)384 svc_recvfrom(struct svc_rqst *rqstp, struct iovec *iov, int nr, int buflen)
385 {
386 	mm_segment_t	oldfs;
387 	struct msghdr	msg;
388 	struct socket	*sock;
389 	int		len, alen;
390 
391 	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
392 	sock = rqstp->rq_sock->sk_sock;
393 
394 	msg.msg_name    = &rqstp->rq_addr;
395 	msg.msg_namelen = sizeof(rqstp->rq_addr);
396 	msg.msg_iov     = iov;
397 	msg.msg_iovlen  = nr;
398 	msg.msg_control = NULL;
399 	msg.msg_controllen = 0;
400 
401 	msg.msg_flags	= MSG_DONTWAIT;
402 
403 	oldfs = get_fs(); set_fs(KERNEL_DS);
404 	len = sock_recvmsg(sock, &msg, buflen, MSG_DONTWAIT);
405 	set_fs(oldfs);
406 
407 	/* sock_recvmsg doesn't fill in the name/namelen, so we must..
408 	 * possibly we should cache this in the svc_sock structure
409 	 * at accept time. FIXME
410 	 */
411 	alen = sizeof(rqstp->rq_addr);
412 	sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1);
413 
414 	dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
415 		rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
416 
417 	return len;
418 }
419 
420 /*
421  * Set socket snd and rcv buffer lengths
422  */
423 static inline void
svc_sock_setbufsize(struct socket * sock,unsigned int snd,unsigned int rcv)424 svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv)
425 {
426 #if 0
427 	mm_segment_t	oldfs;
428 	oldfs = get_fs(); set_fs(KERNEL_DS);
429 	sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
430 			(char*)&snd, sizeof(snd));
431 	sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
432 			(char*)&rcv, sizeof(rcv));
433 #else
434 	/* sock_setsockopt limits use to sysctl_?mem_max,
435 	 * which isn't acceptable.  Until that is made conditional
436 	 * on not having CAP_SYS_RESOURCE or similar, we go direct...
437 	 * DaveM said I could!
438 	 */
439 	lock_sock(sock->sk);
440 	sock->sk->sndbuf = snd * 2;
441 	sock->sk->rcvbuf = rcv * 2;
442 	sock->sk->userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
443 	release_sock(sock->sk);
444 #endif
445 }
446 /*
447  * INET callback when data has been received on the socket.
448  */
449 static void
svc_udp_data_ready(struct sock * sk,int count)450 svc_udp_data_ready(struct sock *sk, int count)
451 {
452 	struct svc_sock	*svsk = (struct svc_sock *)(sk->user_data);
453 
454 	if (!svsk)
455 		goto out;
456 	dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
457 		svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags));
458 	set_bit(SK_DATA, &svsk->sk_flags);
459 	svc_sock_enqueue(svsk);
460  out:
461 	if (sk->sleep && waitqueue_active(sk->sleep))
462 		wake_up_interruptible(sk->sleep);
463 }
464 
465 /*
466  * INET callback when space is newly available on the socket.
467  */
468 static void
svc_write_space(struct sock * sk)469 svc_write_space(struct sock *sk)
470 {
471 	struct svc_sock	*svsk = (struct svc_sock *)(sk->user_data);
472 
473 	if (svsk) {
474 		dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
475 			svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags));
476 		svc_sock_enqueue(svsk);
477 	}
478 
479 	if (sk->sleep && waitqueue_active(sk->sleep))
480 		wake_up_interruptible(sk->sleep);
481 }
482 
483 /*
484  * Receive a datagram from a UDP socket.
485  */
486 static int
svc_udp_recvfrom(struct svc_rqst * rqstp)487 svc_udp_recvfrom(struct svc_rqst *rqstp)
488 {
489 	struct svc_sock	*svsk = rqstp->rq_sock;
490 	struct svc_serv	*serv = svsk->sk_server;
491 	struct sk_buff	*skb;
492 	u32		*data;
493 	int		err, len;
494 
495 	if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
496 		/* udp sockets need large rcvbuf as all pending
497 		 * requests are still in that buffer.  sndbuf must
498 		 * also be large enough that there is enough space
499 		 * for one reply per thread.
500 		 */
501 		svc_sock_setbufsize(svsk->sk_sock,
502 				    (serv->sv_nrthreads+3)* serv->sv_bufsz,
503 				    (serv->sv_nrthreads+3)* serv->sv_bufsz);
504 
505 	clear_bit(SK_DATA, &svsk->sk_flags);
506 	while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
507 		svc_sock_received(svsk);
508 		if (err == -EAGAIN)
509 			return err;
510 		/* possibly an icmp error */
511 		dprintk("svc: recvfrom returned error %d\n", -err);
512 	}
513 	set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
514 
515 	/* Sorry. */
516 	if (skb_is_nonlinear(skb)) {
517 		if (skb_linearize(skb, GFP_KERNEL) != 0) {
518 			kfree_skb(skb);
519 			svc_sock_received(svsk);
520 			return 0;
521 		}
522 	}
523 
524 	if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
525 		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
526 			skb_free_datagram(svsk->sk_sk, skb);
527 			svc_sock_received(svsk);
528 			return 0;
529 		}
530 	}
531 
532 
533 	len  = skb->len - sizeof(struct udphdr);
534 	data = (u32 *) (skb->data + sizeof(struct udphdr));
535 
536 	rqstp->rq_skbuff      = skb;
537 	rqstp->rq_argbuf.base = data;
538 	rqstp->rq_argbuf.buf  = data;
539 	rqstp->rq_argbuf.len  = (len >> 2);
540 	/* rqstp->rq_resbuf      = rqstp->rq_defbuf; */
541 	rqstp->rq_prot        = IPPROTO_UDP;
542 
543 	/* Get sender address */
544 	rqstp->rq_addr.sin_family = AF_INET;
545 	rqstp->rq_addr.sin_port = skb->h.uh->source;
546 	rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr;
547 	rqstp->rq_daddr = skb->nh.iph->daddr;
548 
549 	if (serv->sv_stats)
550 		serv->sv_stats->netudpcnt++;
551 
552 	/* One down, maybe more to go... */
553 	svsk->sk_sk->stamp = skb->stamp;
554 	svc_sock_received(svsk);
555 
556 	return len;
557 }
558 
559 static int
svc_udp_sendto(struct svc_rqst * rqstp)560 svc_udp_sendto(struct svc_rqst *rqstp)
561 {
562 	struct svc_buf	*bufp = &rqstp->rq_resbuf;
563 	int		error;
564 
565 	/* Set up the first element of the reply iovec.
566 	 * Any other iovecs that may be in use have been taken
567 	 * care of by the server implementation itself.
568 	 */
569 	/* bufp->base = bufp->area; */
570 	bufp->iov[0].iov_base = bufp->base;
571 	bufp->iov[0].iov_len  = bufp->len << 2;
572 
573 	error = svc_sendto(rqstp, bufp->iov, bufp->nriov);
574 	if (error == -ECONNREFUSED)
575 		/* ICMP error on earlier request. */
576 		error = svc_sendto(rqstp, bufp->iov, bufp->nriov);
577 
578 	return error;
579 }
580 
581 static int
svc_udp_init(struct svc_sock * svsk)582 svc_udp_init(struct svc_sock *svsk)
583 {
584 	svsk->sk_sk->data_ready = svc_udp_data_ready;
585 	svsk->sk_sk->write_space = svc_write_space;
586 	svsk->sk_recvfrom = svc_udp_recvfrom;
587 	svsk->sk_sendto = svc_udp_sendto;
588 
589 	/* initialise setting must have enough space to
590 	 * receive and respond to one request.
591 	 * svc_udp_recvfrom will re-adjust if necessary
592 	 */
593 	svc_sock_setbufsize(svsk->sk_sock,
594 			    3 * svsk->sk_server->sv_bufsz,
595 			    3 * svsk->sk_server->sv_bufsz);
596 
597 	set_bit(SK_CHNGBUF, &svsk->sk_flags);
598 
599 	return 0;
600 }
601 
602 /*
603  * A data_ready event on a listening socket means there's a connection
604  * pending. Do not use state_change as a substitute for it.
605  */
606 static void
svc_tcp_listen_data_ready(struct sock * sk,int count_unused)607 svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
608 {
609 	struct svc_sock	*svsk;
610 
611 	dprintk("svc: socket %p TCP (listen) state change %d\n",
612 			sk, sk->state);
613 
614 	if  (sk->state != TCP_LISTEN) {
615 		/*
616 		 * This callback may called twice when a new connection
617 		 * is established as a child socket inherits everything
618 		 * from a parent LISTEN socket.
619 		 * 1) data_ready method of the parent socket will be called
620 		 *    when one of child sockets become ESTABLISHED.
621 		 * 2) data_ready method of the child socket may be called
622 		 *    when it receives data before the socket is accepted.
623 		 * In case of 2, we should ignore it silently.
624 		 */
625 		goto out;
626 	}
627 	if (!(svsk = (struct svc_sock *) sk->user_data)) {
628 		printk("svc: socket %p: no user data\n", sk);
629 		goto out;
630 	}
631 	set_bit(SK_CONN, &svsk->sk_flags);
632 	svc_sock_enqueue(svsk);
633  out:
634 	if (sk->sleep && waitqueue_active(sk->sleep))
635 		wake_up_interruptible_all(sk->sleep);
636 }
637 
638 /*
639  * A state change on a connected socket means it's dying or dead.
640  */
641 static void
svc_tcp_state_change(struct sock * sk)642 svc_tcp_state_change(struct sock *sk)
643 {
644 	struct svc_sock	*svsk;
645 
646 	dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
647 			sk, sk->state, sk->user_data);
648 
649 	if (!(svsk = (struct svc_sock *) sk->user_data)) {
650 		printk("svc: socket %p: no user data\n", sk);
651 		goto out;
652 	}
653 	set_bit(SK_CLOSE, &svsk->sk_flags);
654 	svc_sock_enqueue(svsk);
655  out:
656 	if (sk->sleep && waitqueue_active(sk->sleep))
657 		wake_up_interruptible_all(sk->sleep);
658 }
659 
660 static void
svc_tcp_data_ready(struct sock * sk,int count)661 svc_tcp_data_ready(struct sock *sk, int count)
662 {
663 	struct svc_sock *	svsk;
664 
665 	dprintk("svc: socket %p TCP data ready (svsk %p)\n",
666 			sk, sk->user_data);
667 	if (!(svsk = (struct svc_sock *)(sk->user_data)))
668 		goto out;
669 	set_bit(SK_DATA, &svsk->sk_flags);
670 	svc_sock_enqueue(svsk);
671  out:
672 	if (sk->sleep && waitqueue_active(sk->sleep))
673 		wake_up_interruptible(sk->sleep);
674 }
675 
676 /*
677  * Accept a TCP connection
678  */
679 static void
svc_tcp_accept(struct svc_sock * svsk)680 svc_tcp_accept(struct svc_sock *svsk)
681 {
682 	struct sockaddr_in sin;
683 	struct svc_serv	*serv = svsk->sk_server;
684 	struct socket	*sock = svsk->sk_sock;
685 	struct socket	*newsock;
686 	struct proto_ops *ops;
687 	struct svc_sock	*newsvsk;
688 	int		err, slen;
689 
690 	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
691 	if (!sock)
692 		return;
693 
694 	if (!(newsock = sock_alloc())) {
695 		printk(KERN_WARNING "%s: no more sockets!\n", serv->sv_name);
696 		return;
697 	}
698 	dprintk("svc: tcp_accept %p allocated\n", newsock);
699 
700 	newsock->type = sock->type;
701 	newsock->ops = ops = sock->ops;
702 
703 	clear_bit(SK_CONN, &svsk->sk_flags);
704 	if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) {
705 		if (err != -EAGAIN && net_ratelimit())
706 			printk(KERN_WARNING "%s: accept failed (err %d)!\n",
707 				   serv->sv_name, -err);
708 		goto failed;		/* aborted connection or whatever */
709 	}
710 	set_bit(SK_CONN, &svsk->sk_flags);
711 	svc_sock_enqueue(svsk);
712 
713 	slen = sizeof(sin);
714 	err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1);
715 	if (err < 0) {
716 		if (net_ratelimit())
717 			printk(KERN_WARNING "%s: peername failed (err %d)!\n",
718 				   serv->sv_name, -err);
719 		goto failed;		/* aborted connection or whatever */
720 	}
721 
722 	/* Ideally, we would want to reject connections from unauthorized
723 	 * hosts here, but when we get encription, the IP of the host won't
724 	 * tell us anything. For now just warn about unpriv connections.
725 	 */
726 	if (ntohs(sin.sin_port) >= 1024) {
727 		dprintk(KERN_WARNING
728 			"%s: connect from unprivileged port: %u.%u.%u.%u:%d\n",
729 			serv->sv_name,
730 			NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
731 	}
732 
733 	dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name,
734 			NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
735 
736 	if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0)))
737 		goto failed;
738 
739 	/* make sure that a write doesn't block forever when
740 	 * low on memory
741 	 */
742 	newsock->sk->sndtimeo = HZ*30;
743 
744 	/* Precharge. Data may have arrived on the socket before we
745 	 * installed the data_ready callback.
746 	 */
747 	set_bit(SK_DATA, &newsvsk->sk_flags);
748 	svc_sock_enqueue(newsvsk);
749 
750 	/* make sure that we don't have too many active connections.
751 	 * If we have, something must be dropped.
752 	 * We randomly choose between newest and oldest (in terms
753 	 * of recent activity) and drop it.
754 	 */
755 	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*10) {
756 		struct svc_sock *svsk = NULL;
757 		spin_lock_bh(&serv->sv_lock);
758 		if (!list_empty(&serv->sv_tempsocks)) {
759 			if (net_random()&1)
760 				svsk = list_entry(serv->sv_tempsocks.prev,
761 						  struct svc_sock,
762 						  sk_list);
763 			else
764 				svsk = list_entry(serv->sv_tempsocks.next,
765 						  struct svc_sock,
766 						  sk_list);
767 			set_bit(SK_CLOSE, &svsk->sk_flags);
768 			svsk->sk_inuse ++;
769 		}
770 		spin_unlock_bh(&serv->sv_lock);
771 
772 		if (svsk) {
773 			svc_sock_enqueue(svsk);
774 			svc_sock_put(svsk);
775 		}
776 
777 	}
778 
779 	if (serv->sv_stats)
780 		serv->sv_stats->nettcpconn++;
781 
782 	return;
783 
784 failed:
785 	sock_release(newsock);
786 	return;
787 }
788 
789 /*
790  * Receive data from a TCP socket.
791  */
792 static int
svc_tcp_recvfrom(struct svc_rqst * rqstp)793 svc_tcp_recvfrom(struct svc_rqst *rqstp)
794 {
795 	struct svc_sock	*svsk = rqstp->rq_sock;
796 	struct svc_serv	*serv = svsk->sk_server;
797 	struct svc_buf	*bufp = &rqstp->rq_argbuf;
798 	int		len;
799 
800 	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
801 		svsk, test_bit(SK_DATA, &svsk->sk_flags),
802 		test_bit(SK_CONN, &svsk->sk_flags),
803 		test_bit(SK_CLOSE, &svsk->sk_flags));
804 
805 	if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
806 		svc_delete_socket(svsk);
807 		return 0;
808 	}
809 
810 	if (svsk->sk_sk->state == TCP_LISTEN) {
811 		svc_tcp_accept(svsk);
812 		svc_sock_received(svsk);
813 		return 0;
814 	}
815 
816 	if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
817 		/* sndbuf needs to have room for one request
818 		 * per thread, otherwise we can stall even when the
819 		 * network isn't a bottleneck.
820 		 * rcvbuf just needs to be able to hold a few requests.
821 		 * Normally they will be removed from the queue
822 		 * as soon as a complete request arrives.
823 		 */
824 		svc_sock_setbufsize(svsk->sk_sock,
825 				    (serv->sv_nrthreads+3) *
826 				    serv->sv_bufsz,
827 				    3 * serv->sv_bufsz);
828 
829 	clear_bit(SK_DATA, &svsk->sk_flags);
830 
831 	/* Receive data. If we haven't got the record length yet, get
832 	 * the next four bytes. Otherwise try to gobble up as much as
833 	 * possible up to the complete record length.
834 	 */
835 	if (svsk->sk_tcplen < 4) {
836 		unsigned long	want = 4 - svsk->sk_tcplen;
837 		struct iovec	iov;
838 
839 		iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
840 		iov.iov_len  = want;
841 		if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
842 			goto error;
843 		svsk->sk_tcplen += len;
844 		if (len < want) {
845 			dprintk("svc: short recvfrom while reading record length (%d of %ld)\n",
846 			        len, want);
847 			svc_sock_received(svsk);
848 			return -EAGAIN; /* record header not complete */
849 		}
850 
851 		svsk->sk_reclen = ntohl(svsk->sk_reclen);
852 		if (!(svsk->sk_reclen & 0x80000000)) {
853 			/* FIXME: technically, a record can be fragmented,
854 			 *  and non-terminal fragments will not have the top
855 			 *  bit set in the fragment length header.
856 			 *  But apparently no known nfs clients send fragmented
857 			 *  records. */
858 			printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n",
859 			       (unsigned long) svsk->sk_reclen);
860 			goto err_delete;
861 		}
862 		svsk->sk_reclen &= 0x7fffffff;
863 		dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
864 		if (svsk->sk_reclen > (bufp->buflen<<2)) {
865 			printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n",
866 			       (unsigned long) svsk->sk_reclen);
867 			goto err_delete;
868 		}
869 	}
870 
871 	/* Check whether enough data is available */
872 	len = svc_recv_available(svsk);
873 	if (len < 0)
874 		goto error;
875 
876 	if (len < svsk->sk_reclen) {
877 		dprintk("svc: incomplete TCP record (%d of %d)\n",
878 			len, svsk->sk_reclen);
879 		svc_sock_received(svsk);
880 		return -EAGAIN;	/* record not complete */
881 	}
882 	set_bit(SK_DATA, &svsk->sk_flags);
883 
884 	/* Frob argbuf */
885 	bufp->iov[0].iov_base += 4;
886 	bufp->iov[0].iov_len  -= 4;
887 
888 	/* Now receive data */
889 	len = svc_recvfrom(rqstp, bufp->iov, bufp->nriov, svsk->sk_reclen);
890 	if (len < 0)
891 		goto error;
892 
893 	dprintk("svc: TCP complete record (%d bytes)\n", len);
894 
895 	/* Position reply write pointer immediately after
896 	 * record length */
897 	rqstp->rq_resbuf.buf += 1;
898 	rqstp->rq_resbuf.len  = 1;
899 
900 	rqstp->rq_skbuff      = 0;
901 	rqstp->rq_argbuf.buf += 1;
902 	rqstp->rq_argbuf.len  = (len >> 2);
903 	rqstp->rq_prot	      = IPPROTO_TCP;
904 
905 	/* Reset TCP read info */
906 	svsk->sk_reclen = 0;
907 	svsk->sk_tcplen = 0;
908 
909 	svc_sock_received(svsk);
910 	if (serv->sv_stats)
911 		serv->sv_stats->nettcpcnt++;
912 
913 	return len;
914 
915  err_delete:
916 	svc_delete_socket(svsk);
917 	return -EAGAIN;
918 
919  error:
920 	if (len == -EAGAIN) {
921 		dprintk("RPC: TCP recvfrom got EAGAIN\n");
922 		svc_sock_received(svsk);
923 	} else {
924 		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
925 					svsk->sk_server->sv_name, -len);
926 		svc_sock_received(svsk);
927 	}
928 
929 	return len;
930 }
931 
932 /*
933  * Send out data on TCP socket.
934  */
935 static int
svc_tcp_sendto(struct svc_rqst * rqstp)936 svc_tcp_sendto(struct svc_rqst *rqstp)
937 {
938 	struct svc_buf	*bufp = &rqstp->rq_resbuf;
939 	int sent;
940 
941 	/* Set up the first element of the reply iovec.
942 	 * Any other iovecs that may be in use have been taken
943 	 * care of by the server implementation itself.
944 	 */
945 	bufp->iov[0].iov_base = bufp->base;
946 	bufp->iov[0].iov_len  = bufp->len << 2;
947 	bufp->base[0] = htonl(0x80000000|((bufp->len << 2) - 4));
948 
949 	if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags))
950 		return -ENOTCONN;
951 
952 	sent = svc_sendto(rqstp, bufp->iov, bufp->nriov);
953 	if (sent != bufp->len<<2) {
954 		printk(KERN_NOTICE "rpc-srv/tcp: %s: sent only %d bytes of %d - shutting down socket\n",
955 		       rqstp->rq_sock->sk_server->sv_name,
956 		       sent, bufp->len << 2);
957 		svc_delete_socket(rqstp->rq_sock);
958 		sent = -EAGAIN;
959 	}
960 	return sent;
961 }
962 
963 static int
svc_tcp_init(struct svc_sock * svsk)964 svc_tcp_init(struct svc_sock *svsk)
965 {
966 	struct sock	*sk = svsk->sk_sk;
967 	struct tcp_opt  *tp = &(sk->tp_pinfo.af_tcp);
968 
969 	svsk->sk_recvfrom = svc_tcp_recvfrom;
970 	svsk->sk_sendto = svc_tcp_sendto;
971 
972 	if (sk->state == TCP_LISTEN) {
973 		dprintk("setting up TCP socket for listening\n");
974 		sk->data_ready = svc_tcp_listen_data_ready;
975 	} else {
976 		dprintk("setting up TCP socket for reading\n");
977 		sk->state_change = svc_tcp_state_change;
978 		sk->data_ready = svc_tcp_data_ready;
979 		sk->write_space = svc_write_space;
980 
981 		svsk->sk_reclen = 0;
982 		svsk->sk_tcplen = 0;
983 
984 		tp->nonagle = 1;        /* disable Nagle's algorithm */
985 
986 		/* initialise setting must have enough space to
987 		 * receive and respond to one request.
988 		 * svc_tcp_recvfrom will re-adjust if necessary
989 		 */
990 		svc_sock_setbufsize(svsk->sk_sock,
991 				    3 * svsk->sk_server->sv_bufsz,
992 				    3 * svsk->sk_server->sv_bufsz);
993 
994 		set_bit(SK_CHNGBUF, &svsk->sk_flags);
995 		if (sk->state != TCP_ESTABLISHED)
996 			set_bit(SK_CLOSE, &svsk->sk_flags);
997 	}
998 
999 	return 0;
1000 }
1001 
1002 void
svc_sock_update_bufs(struct svc_serv * serv)1003 svc_sock_update_bufs(struct svc_serv *serv)
1004 {
1005 	/*
1006 	 * The number of server threads has changed.
1007 	 * flag all socket to the snd/rcv buffer sizes
1008 	 * updated.
1009 	 * We don't just do it, as the locking is rather
1010 	 * awkward at this point
1011 	 */
1012 	struct list_head *le;
1013 
1014 	spin_lock_bh(&serv->sv_lock);
1015 	list_for_each(le, &serv->sv_permsocks) {
1016 		struct svc_sock *svsk =
1017 			list_entry(le, struct svc_sock, sk_list);
1018 		set_bit(SK_CHNGBUF, &svsk->sk_flags);
1019 	}
1020 	list_for_each(le, &serv->sv_tempsocks) {
1021 		struct svc_sock *svsk =
1022 			list_entry(le, struct svc_sock, sk_list);
1023 		set_bit(SK_CHNGBUF, &svsk->sk_flags);
1024 	}
1025 	spin_unlock_bh(&serv->sv_lock);
1026 }
1027 
1028 /*
1029  * Receive the next request on any socket.
1030  */
1031 int
svc_recv(struct svc_serv * serv,struct svc_rqst * rqstp,long timeout)1032 svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
1033 {
1034 	struct svc_sock		*svsk =NULL;
1035 	int			len;
1036 	DECLARE_WAITQUEUE(wait, current);
1037 
1038 	dprintk("svc: server %p waiting for data (to = %ld)\n",
1039 		rqstp, timeout);
1040 
1041 	if (rqstp->rq_sock)
1042 		printk(KERN_ERR
1043 			"svc_recv: service %p, socket not NULL!\n",
1044 			 rqstp);
1045 	if (waitqueue_active(&rqstp->rq_wait))
1046 		printk(KERN_ERR
1047 			"svc_recv: service %p, wait queue active!\n",
1048 			 rqstp);
1049 
1050 	/* Initialize the buffers */
1051 	rqstp->rq_argbuf = rqstp->rq_defbuf;
1052 	rqstp->rq_resbuf = rqstp->rq_defbuf;
1053 
1054 	if (signalled())
1055 		return -EINTR;
1056 
1057 	spin_lock_bh(&serv->sv_lock);
1058 	if (!list_empty(&serv->sv_tempsocks)) {
1059 		svsk = list_entry(serv->sv_tempsocks.next,
1060 				  struct svc_sock, sk_list);
1061 		/* apparently the "standard" is that clients close
1062 		 * idle connections after 5 minutes, servers after
1063 		 * 6 minutes
1064 		 *   http://www.connectathon.org/talks96/nfstcp.pdf
1065 		 */
1066 		if (CURRENT_TIME - svsk->sk_lastrecv < 6*60
1067 		    || test_bit(SK_BUSY, &svsk->sk_flags))
1068 			svsk = NULL;
1069 	}
1070 	if (svsk) {
1071 		set_bit(SK_BUSY, &svsk->sk_flags);
1072 		set_bit(SK_CLOSE, &svsk->sk_flags);
1073 		rqstp->rq_sock = svsk;
1074 		svsk->sk_inuse++;
1075 	} else if ((svsk = svc_sock_dequeue(serv)) != NULL) {
1076 		rqstp->rq_sock = svsk;
1077 		svsk->sk_inuse++;
1078 		rqstp->rq_reserved = serv->sv_bufsz;
1079 		svsk->sk_reserved += rqstp->rq_reserved;
1080 	} else {
1081 		/* No data pending. Go to sleep */
1082 		svc_serv_enqueue(serv, rqstp);
1083 
1084 		/*
1085 		 * We have to be able to interrupt this wait
1086 		 * to bring down the daemons ...
1087 		 */
1088 		set_current_state(TASK_INTERRUPTIBLE);
1089 		add_wait_queue(&rqstp->rq_wait, &wait);
1090 		spin_unlock_bh(&serv->sv_lock);
1091 
1092 		schedule_timeout(timeout);
1093 
1094 		spin_lock_bh(&serv->sv_lock);
1095 		remove_wait_queue(&rqstp->rq_wait, &wait);
1096 
1097 		if (!(svsk = rqstp->rq_sock)) {
1098 			svc_serv_dequeue(serv, rqstp);
1099 			spin_unlock_bh(&serv->sv_lock);
1100 			dprintk("svc: server %p, no data yet\n", rqstp);
1101 			return signalled()? -EINTR : -EAGAIN;
1102 		}
1103 	}
1104 	spin_unlock_bh(&serv->sv_lock);
1105 
1106 	dprintk("svc: server %p, socket %p, inuse=%d\n",
1107 		 rqstp, svsk, svsk->sk_inuse);
1108 	len = svsk->sk_recvfrom(rqstp);
1109 	dprintk("svc: got len=%d\n", len);
1110 
1111 	/* No data, incomplete (TCP) read, or accept() */
1112 	if (len == 0 || len == -EAGAIN) {
1113 		svc_sock_release(rqstp);
1114 		return -EAGAIN;
1115 	}
1116 	svsk->sk_lastrecv = CURRENT_TIME;
1117 	if (test_bit(SK_TEMP, &svsk->sk_flags)) {
1118 		/* push active sockets to end of list */
1119 		spin_lock_bh(&serv->sv_lock);
1120 		list_del(&svsk->sk_list);
1121 		list_add_tail(&svsk->sk_list, &serv->sv_tempsocks);
1122 		spin_unlock_bh(&serv->sv_lock);
1123 	}
1124 
1125 	rqstp->rq_secure  = ntohs(rqstp->rq_addr.sin_port) < 1024;
1126 	rqstp->rq_userset = 0;
1127 	rqstp->rq_verfed  = 0;
1128 
1129 	svc_getlong(&rqstp->rq_argbuf, rqstp->rq_xid);
1130 	svc_putlong(&rqstp->rq_resbuf, rqstp->rq_xid);
1131 
1132 	/* Assume that the reply consists of a single buffer. */
1133 	rqstp->rq_resbuf.nriov = 1;
1134 
1135 	if (serv->sv_stats)
1136 		serv->sv_stats->netcnt++;
1137 	return len;
1138 }
1139 
1140 /*
1141  * Drop request
1142  */
1143 void
svc_drop(struct svc_rqst * rqstp)1144 svc_drop(struct svc_rqst *rqstp)
1145 {
1146 	dprintk("svc: socket %p dropped request\n", rqstp->rq_sock);
1147 	svc_sock_release(rqstp);
1148 }
1149 
1150 /*
1151  * Return reply to client.
1152  */
1153 int
svc_send(struct svc_rqst * rqstp)1154 svc_send(struct svc_rqst *rqstp)
1155 {
1156 	struct svc_sock	*svsk;
1157 	int		len;
1158 
1159 	if ((svsk = rqstp->rq_sock) == NULL) {
1160 		printk(KERN_WARNING "NULL socket pointer in %s:%d\n",
1161 				__FILE__, __LINE__);
1162 		return -EFAULT;
1163 	}
1164 
1165 	/* release the receive skb before sending the reply */
1166 	svc_release_skb(rqstp);
1167 
1168 	len = svsk->sk_sendto(rqstp);
1169 	svc_sock_release(rqstp);
1170 
1171 	if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
1172 		return 0;
1173 	return len;
1174 }
1175 
1176 /*
1177  * Initialize socket for RPC use and create svc_sock struct
1178  * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
1179  */
1180 static struct svc_sock *
svc_setup_socket(struct svc_serv * serv,struct socket * sock,int * errp,int pmap_register)1181 svc_setup_socket(struct svc_serv *serv, struct socket *sock,
1182 					int *errp, int pmap_register)
1183 {
1184 	struct svc_sock	*svsk;
1185 	struct sock	*inet;
1186 
1187 	dprintk("svc: svc_setup_socket %p\n", sock);
1188 	if (!(svsk = kmalloc(sizeof(*svsk), GFP_KERNEL))) {
1189 		*errp = -ENOMEM;
1190 		return NULL;
1191 	}
1192 	memset(svsk, 0, sizeof(*svsk));
1193 
1194 	inet = sock->sk;
1195 	inet->user_data = svsk;
1196 	svsk->sk_sock = sock;
1197 	svsk->sk_sk = inet;
1198 	svsk->sk_ostate = inet->state_change;
1199 	svsk->sk_odata = inet->data_ready;
1200 	svsk->sk_owspace = inet->write_space;
1201 	svsk->sk_server = serv;
1202 	svsk->sk_lastrecv = CURRENT_TIME;
1203 
1204 	/* Initialize the socket */
1205 	if (sock->type == SOCK_DGRAM)
1206 		*errp = svc_udp_init(svsk);
1207 	else
1208 		*errp = svc_tcp_init(svsk);
1209 if (svsk->sk_sk == NULL)
1210 	printk(KERN_WARNING "svsk->sk_sk == NULL after svc_prot_init!\n");
1211 
1212 	/* Register socket with portmapper */
1213 	if (*errp >= 0 && pmap_register)
1214 		*errp = svc_register(serv, inet->protocol, ntohs(inet->sport));
1215 
1216 	if (*errp < 0) {
1217 		inet->user_data = NULL;
1218 		kfree(svsk);
1219 		return NULL;
1220 	}
1221 
1222 
1223 	spin_lock_bh(&serv->sv_lock);
1224 	if (!pmap_register) {
1225 		set_bit(SK_TEMP, &svsk->sk_flags);
1226 		list_add(&svsk->sk_list, &serv->sv_tempsocks);
1227 		serv->sv_tmpcnt++;
1228 	} else {
1229 		clear_bit(SK_TEMP, &svsk->sk_flags);
1230 		list_add(&svsk->sk_list, &serv->sv_permsocks);
1231 	}
1232 	spin_unlock_bh(&serv->sv_lock);
1233 
1234 	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1235 				svsk, svsk->sk_sk);
1236 	return svsk;
1237 }
1238 
1239 /*
1240  * Create socket for RPC service.
1241  */
1242 static int
svc_create_socket(struct svc_serv * serv,int protocol,struct sockaddr_in * sin)1243 svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
1244 {
1245 	struct svc_sock	*svsk;
1246 	struct socket	*sock;
1247 	int		error;
1248 	int		type;
1249 
1250 	dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
1251 				serv->sv_program->pg_name, protocol,
1252 				NIPQUAD(sin->sin_addr.s_addr),
1253 				ntohs(sin->sin_port));
1254 
1255 	if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
1256 		printk(KERN_WARNING "svc: only UDP and TCP "
1257 				"sockets supported\n");
1258 		return -EINVAL;
1259 	}
1260 	type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
1261 
1262 	if ((error = sock_create(PF_INET, type, protocol, &sock)) < 0)
1263 		return error;
1264 
1265 	if (sin != NULL) {
1266 		if (type == SOCK_STREAM)
1267 			sock->sk->reuse = 1; /* allow address reuse */
1268 		error = sock->ops->bind(sock, (struct sockaddr *) sin,
1269 						sizeof(*sin));
1270 		if (error < 0)
1271 			goto bummer;
1272 	}
1273 
1274 	if (protocol == IPPROTO_TCP) {
1275 		if ((error = sock->ops->listen(sock, 64)) < 0)
1276 			goto bummer;
1277 	}
1278 
1279 	if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
1280 		return 0;
1281 
1282 bummer:
1283 	dprintk("svc: svc_create_socket error = %d\n", -error);
1284 	sock_release(sock);
1285 	return error;
1286 }
1287 
1288 /*
1289  * Remove a dead socket
1290  */
1291 void
svc_delete_socket(struct svc_sock * svsk)1292 svc_delete_socket(struct svc_sock *svsk)
1293 {
1294 	struct svc_serv	*serv;
1295 	struct sock	*sk;
1296 
1297 	dprintk("svc: svc_delete_socket(%p)\n", svsk);
1298 
1299 	if (test_and_set_bit(SK_DEAD, &svsk->sk_flags))
1300 		return ;
1301 
1302 	serv = svsk->sk_server;
1303 	sk = svsk->sk_sk;
1304 
1305 	sk->state_change = svsk->sk_ostate;
1306 	sk->data_ready = svsk->sk_odata;
1307 	sk->write_space = svsk->sk_owspace;
1308 
1309 	spin_lock_bh(&serv->sv_lock);
1310 
1311 	list_del(&svsk->sk_list);
1312 	if (test_bit(SK_TEMP, &svsk->sk_flags))
1313 		serv->sv_tmpcnt--;
1314 	if (test_bit(SK_QUED, &svsk->sk_flags))
1315 		list_del(&svsk->sk_ready);
1316 
1317 
1318 	if (!svsk->sk_inuse) {
1319 		spin_unlock_bh(&serv->sv_lock);
1320 		sock_release(svsk->sk_sock);
1321 		kfree(svsk);
1322 	} else {
1323 		spin_unlock_bh(&serv->sv_lock);
1324 		dprintk(KERN_NOTICE "svc: server socket destroy delayed\n");
1325 		/* svsk->sk_server = NULL; */
1326 	}
1327 }
1328 
1329 /*
1330  * Make a socket for nfsd and lockd
1331  */
1332 int
svc_makesock(struct svc_serv * serv,int protocol,unsigned short port)1333 svc_makesock(struct svc_serv *serv, int protocol, unsigned short port)
1334 {
1335 	struct sockaddr_in	sin;
1336 
1337 	dprintk("svc: creating socket proto = %d\n", protocol);
1338 	sin.sin_family      = AF_INET;
1339 	sin.sin_addr.s_addr = INADDR_ANY;
1340 	sin.sin_port        = htons(port);
1341 	return svc_create_socket(serv, protocol, &sin);
1342 }
1343 
1344