1 /*
2  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the BSD-type
8  * license below:
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  *      Redistributions of source code must retain the above copyright
15  *      notice, this list of conditions and the following disclaimer.
16  *
17  *      Redistributions in binary form must reproduce the above
18  *      copyright notice, this list of conditions and the following
19  *      disclaimer in the documentation and/or other materials provided
20  *      with the distribution.
21  *
22  *      Neither the name of the Network Appliance, Inc. nor the names of
23  *      its contributors may be used to endorse or promote products
24  *      derived from this software without specific prior written
25  *      permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38  *
39  * Author: Tom Tucker <tom@opengridcomputing.com>
40  */
41 
42 #include <linux/sunrpc/xdr.h>
43 #include <linux/sunrpc/debug.h>
44 #include <asm/unaligned.h>
45 #include <linux/sunrpc/rpc_rdma.h>
46 #include <linux/sunrpc/svc_rdma.h>
47 
48 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
49 
50 /*
51  * Decodes a read chunk list. The expected format is as follows:
52  *    descrim  : xdr_one
53  *    position : u32 offset into XDR stream
54  *    handle   : u32 RKEY
55  *    . . .
56  *  end-of-list: xdr_zero
57  */
decode_read_list(u32 * va,u32 * vaend)58 static u32 *decode_read_list(u32 *va, u32 *vaend)
59 {
60 	struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
61 
62 	while (ch->rc_discrim != xdr_zero) {
63 		if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
64 		    (unsigned long)vaend) {
65 			dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
66 			return NULL;
67 		}
68 		ch++;
69 	}
70 	return (u32 *)&ch->rc_position;
71 }
72 
73 /*
74  * Determine number of chunks and total bytes in chunk list. The chunk
75  * list has already been verified to fit within the RPCRDMA header.
76  */
svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk * ch,int * ch_count,int * byte_count)77 void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
78 			       int *ch_count, int *byte_count)
79 {
80 	/* compute the number of bytes represented by read chunks */
81 	*byte_count = 0;
82 	*ch_count = 0;
83 	for (; ch->rc_discrim != 0; ch++) {
84 		*byte_count = *byte_count + ntohl(ch->rc_target.rs_length);
85 		*ch_count = *ch_count + 1;
86 	}
87 }
88 
89 /*
90  * Decodes a write chunk list. The expected format is as follows:
91  *    descrim  : xdr_one
92  *    nchunks  : <count>
93  *       handle   : u32 RKEY              ---+
94  *       length   : u32 <len of segment>     |
95  *       offset   : remove va                + <count>
96  *       . . .                               |
97  *                                        ---+
98  */
decode_write_list(u32 * va,u32 * vaend)99 static u32 *decode_write_list(u32 *va, u32 *vaend)
100 {
101 	int nchunks;
102 
103 	struct rpcrdma_write_array *ary =
104 		(struct rpcrdma_write_array *)va;
105 
106 	/* Check for not write-array */
107 	if (ary->wc_discrim == xdr_zero)
108 		return (u32 *)&ary->wc_nchunks;
109 
110 	if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
111 	    (unsigned long)vaend) {
112 		dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
113 		return NULL;
114 	}
115 	nchunks = ntohl(ary->wc_nchunks);
116 	if (((unsigned long)&ary->wc_array[0] +
117 	     (sizeof(struct rpcrdma_write_chunk) * nchunks)) >
118 	    (unsigned long)vaend) {
119 		dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
120 			ary, nchunks, vaend);
121 		return NULL;
122 	}
123 	/*
124 	 * rs_length is the 2nd 4B field in wc_target and taking its
125 	 * address skips the list terminator
126 	 */
127 	return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length;
128 }
129 
decode_reply_array(u32 * va,u32 * vaend)130 static u32 *decode_reply_array(u32 *va, u32 *vaend)
131 {
132 	int nchunks;
133 	struct rpcrdma_write_array *ary =
134 		(struct rpcrdma_write_array *)va;
135 
136 	/* Check for no reply-array */
137 	if (ary->wc_discrim == xdr_zero)
138 		return (u32 *)&ary->wc_nchunks;
139 
140 	if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
141 	    (unsigned long)vaend) {
142 		dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
143 		return NULL;
144 	}
145 	nchunks = ntohl(ary->wc_nchunks);
146 	if (((unsigned long)&ary->wc_array[0] +
147 	     (sizeof(struct rpcrdma_write_chunk) * nchunks)) >
148 	    (unsigned long)vaend) {
149 		dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
150 			ary, nchunks, vaend);
151 		return NULL;
152 	}
153 	return (u32 *)&ary->wc_array[nchunks];
154 }
155 
svc_rdma_xdr_decode_req(struct rpcrdma_msg ** rdma_req,struct svc_rqst * rqstp)156 int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
157 			    struct svc_rqst *rqstp)
158 {
159 	struct rpcrdma_msg *rmsgp = NULL;
160 	u32 *va;
161 	u32 *vaend;
162 	u32 hdr_len;
163 
164 	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
165 
166 	/* Verify that there's enough bytes for header + something */
167 	if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
168 		dprintk("svcrdma: header too short = %d\n",
169 			rqstp->rq_arg.len);
170 		return -EINVAL;
171 	}
172 
173 	/* Decode the header */
174 	rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
175 	rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
176 	rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
177 	rmsgp->rm_type = ntohl(rmsgp->rm_type);
178 
179 	if (rmsgp->rm_vers != RPCRDMA_VERSION)
180 		return -ENOSYS;
181 
182 	/* Pull in the extra for the padded case and bump our pointer */
183 	if (rmsgp->rm_type == RDMA_MSGP) {
184 		int hdrlen;
185 		rmsgp->rm_body.rm_padded.rm_align =
186 			ntohl(rmsgp->rm_body.rm_padded.rm_align);
187 		rmsgp->rm_body.rm_padded.rm_thresh =
188 			ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
189 
190 		va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
191 		rqstp->rq_arg.head[0].iov_base = va;
192 		hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
193 		rqstp->rq_arg.head[0].iov_len -= hdrlen;
194 		if (hdrlen > rqstp->rq_arg.len)
195 			return -EINVAL;
196 		return hdrlen;
197 	}
198 
199 	/* The chunk list may contain either a read chunk list or a write
200 	 * chunk list and a reply chunk list.
201 	 */
202 	va = &rmsgp->rm_body.rm_chunks[0];
203 	vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
204 	va = decode_read_list(va, vaend);
205 	if (!va)
206 		return -EINVAL;
207 	va = decode_write_list(va, vaend);
208 	if (!va)
209 		return -EINVAL;
210 	va = decode_reply_array(va, vaend);
211 	if (!va)
212 		return -EINVAL;
213 
214 	rqstp->rq_arg.head[0].iov_base = va;
215 	hdr_len = (unsigned long)va - (unsigned long)rmsgp;
216 	rqstp->rq_arg.head[0].iov_len -= hdr_len;
217 
218 	*rdma_req = rmsgp;
219 	return hdr_len;
220 }
221 
svc_rdma_xdr_decode_deferred_req(struct svc_rqst * rqstp)222 int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
223 {
224 	struct rpcrdma_msg *rmsgp = NULL;
225 	struct rpcrdma_read_chunk *ch;
226 	struct rpcrdma_write_array *ary;
227 	u32 *va;
228 	u32 hdrlen;
229 
230 	dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
231 		rqstp);
232 	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
233 
234 	/* Pull in the extra for the padded case and bump our pointer */
235 	if (rmsgp->rm_type == RDMA_MSGP) {
236 		va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
237 		rqstp->rq_arg.head[0].iov_base = va;
238 		hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
239 		rqstp->rq_arg.head[0].iov_len -= hdrlen;
240 		return hdrlen;
241 	}
242 
243 	/*
244 	 * Skip all chunks to find RPC msg. These were previously processed
245 	 */
246 	va = &rmsgp->rm_body.rm_chunks[0];
247 
248 	/* Skip read-list */
249 	for (ch = (struct rpcrdma_read_chunk *)va;
250 	     ch->rc_discrim != xdr_zero; ch++);
251 	va = (u32 *)&ch->rc_position;
252 
253 	/* Skip write-list */
254 	ary = (struct rpcrdma_write_array *)va;
255 	if (ary->wc_discrim == xdr_zero)
256 		va = (u32 *)&ary->wc_nchunks;
257 	else
258 		/*
259 		 * rs_length is the 2nd 4B field in wc_target and taking its
260 		 * address skips the list terminator
261 		 */
262 		va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
263 
264 	/* Skip reply-array */
265 	ary = (struct rpcrdma_write_array *)va;
266 	if (ary->wc_discrim == xdr_zero)
267 		va = (u32 *)&ary->wc_nchunks;
268 	else
269 		va = (u32 *)&ary->wc_array[ary->wc_nchunks];
270 
271 	rqstp->rq_arg.head[0].iov_base = va;
272 	hdrlen = (unsigned long)va - (unsigned long)rmsgp;
273 	rqstp->rq_arg.head[0].iov_len -= hdrlen;
274 
275 	return hdrlen;
276 }
277 
svc_rdma_xdr_encode_error(struct svcxprt_rdma * xprt,struct rpcrdma_msg * rmsgp,enum rpcrdma_errcode err,u32 * va)278 int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
279 			      struct rpcrdma_msg *rmsgp,
280 			      enum rpcrdma_errcode err, u32 *va)
281 {
282 	u32 *startp = va;
283 
284 	*va++ = htonl(rmsgp->rm_xid);
285 	*va++ = htonl(rmsgp->rm_vers);
286 	*va++ = htonl(xprt->sc_max_requests);
287 	*va++ = htonl(RDMA_ERROR);
288 	*va++ = htonl(err);
289 	if (err == ERR_VERS) {
290 		*va++ = htonl(RPCRDMA_VERSION);
291 		*va++ = htonl(RPCRDMA_VERSION);
292 	}
293 
294 	return (int)((unsigned long)va - (unsigned long)startp);
295 }
296 
svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg * rmsgp)297 int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
298 {
299 	struct rpcrdma_write_array *wr_ary;
300 
301 	/* There is no read-list in a reply */
302 
303 	/* skip write list */
304 	wr_ary = (struct rpcrdma_write_array *)
305 		&rmsgp->rm_body.rm_chunks[1];
306 	if (wr_ary->wc_discrim)
307 		wr_ary = (struct rpcrdma_write_array *)
308 			&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
309 			wc_target.rs_length;
310 	else
311 		wr_ary = (struct rpcrdma_write_array *)
312 			&wr_ary->wc_nchunks;
313 
314 	/* skip reply array */
315 	if (wr_ary->wc_discrim)
316 		wr_ary = (struct rpcrdma_write_array *)
317 			&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
318 	else
319 		wr_ary = (struct rpcrdma_write_array *)
320 			&wr_ary->wc_nchunks;
321 
322 	return (unsigned long) wr_ary - (unsigned long) rmsgp;
323 }
324 
svc_rdma_xdr_encode_write_list(struct rpcrdma_msg * rmsgp,int chunks)325 void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
326 {
327 	struct rpcrdma_write_array *ary;
328 
329 	/* no read-list */
330 	rmsgp->rm_body.rm_chunks[0] = xdr_zero;
331 
332 	/* write-array discrim */
333 	ary = (struct rpcrdma_write_array *)
334 		&rmsgp->rm_body.rm_chunks[1];
335 	ary->wc_discrim = xdr_one;
336 	ary->wc_nchunks = htonl(chunks);
337 
338 	/* write-list terminator */
339 	ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
340 
341 	/* reply-array discriminator */
342 	ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
343 }
344 
svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array * ary,int chunks)345 void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
346 				 int chunks)
347 {
348 	ary->wc_discrim = xdr_one;
349 	ary->wc_nchunks = htonl(chunks);
350 }
351 
svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array * ary,int chunk_no,__be32 rs_handle,__be64 rs_offset,u32 write_len)352 void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
353 				     int chunk_no,
354 				     __be32 rs_handle,
355 				     __be64 rs_offset,
356 				     u32 write_len)
357 {
358 	struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
359 	seg->rs_handle = rs_handle;
360 	seg->rs_offset = rs_offset;
361 	seg->rs_length = htonl(write_len);
362 }
363 
svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma * xprt,struct rpcrdma_msg * rdma_argp,struct rpcrdma_msg * rdma_resp,enum rpcrdma_proc rdma_type)364 void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
365 				  struct rpcrdma_msg *rdma_argp,
366 				  struct rpcrdma_msg *rdma_resp,
367 				  enum rpcrdma_proc rdma_type)
368 {
369 	rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
370 	rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
371 	rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
372 	rdma_resp->rm_type = htonl(rdma_type);
373 
374 	/* Encode <nul> chunks lists */
375 	rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
376 	rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
377 	rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
378 }
379