1 /*
2  *  pNFS Objects layout driver high level definitions
3  *
4  *  Copyright (C) 2007 Panasas Inc. [year of first publication]
5  *  All rights reserved.
6  *
7  *  Benny Halevy <bhalevy@panasas.com>
8  *  Boaz Harrosh <bharrosh@panasas.com>
9  *
10  *  This program is free software; you can redistribute it and/or modify
11  *  it under the terms of the GNU General Public License version 2
12  *  See the file COPYING included with this distribution for more details.
13  *
14  *  Redistribution and use in source and binary forms, with or without
15  *  modification, are permitted provided that the following conditions
16  *  are met:
17  *
18  *  1. Redistributions of source code must retain the above copyright
19  *     notice, this list of conditions and the following disclaimer.
20  *  2. Redistributions in binary form must reproduce the above copyright
21  *     notice, this list of conditions and the following disclaimer in the
22  *     documentation and/or other materials provided with the distribution.
23  *  3. Neither the name of the Panasas company nor the names of its
24  *     contributors may be used to endorse or promote products derived
25  *     from this software without specific prior written permission.
26  *
27  *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
28  *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29  *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30  *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34  *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 #include <linux/kmod.h>
41 #include <linux/moduleparam.h>
42 #include <linux/ratelimit.h>
43 #include <scsi/osd_initiator.h>
44 #include "objlayout.h"
45 
46 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
47 /*
48  * Create a objlayout layout structure for the given inode and return it.
49  */
50 struct pnfs_layout_hdr *
objlayout_alloc_layout_hdr(struct inode * inode,gfp_t gfp_flags)51 objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
52 {
53 	struct objlayout *objlay;
54 
55 	objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
56 	if (objlay) {
57 		spin_lock_init(&objlay->lock);
58 		INIT_LIST_HEAD(&objlay->err_list);
59 	}
60 	dprintk("%s: Return %p\n", __func__, objlay);
61 	return &objlay->pnfs_layout;
62 }
63 
64 /*
65  * Free an objlayout layout structure
66  */
67 void
objlayout_free_layout_hdr(struct pnfs_layout_hdr * lo)68 objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
69 {
70 	struct objlayout *objlay = OBJLAYOUT(lo);
71 
72 	dprintk("%s: objlay %p\n", __func__, objlay);
73 
74 	WARN_ON(!list_empty(&objlay->err_list));
75 	kfree(objlay);
76 }
77 
78 /*
79  * Unmarshall layout and store it in pnfslay.
80  */
81 struct pnfs_layout_segment *
objlayout_alloc_lseg(struct pnfs_layout_hdr * pnfslay,struct nfs4_layoutget_res * lgr,gfp_t gfp_flags)82 objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
83 		     struct nfs4_layoutget_res *lgr,
84 		     gfp_t gfp_flags)
85 {
86 	int status = -ENOMEM;
87 	struct xdr_stream stream;
88 	struct xdr_buf buf = {
89 		.pages =  lgr->layoutp->pages,
90 		.page_len =  lgr->layoutp->len,
91 		.buflen =  lgr->layoutp->len,
92 		.len = lgr->layoutp->len,
93 	};
94 	struct page *scratch;
95 	struct pnfs_layout_segment *lseg;
96 
97 	dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
98 
99 	scratch = alloc_page(gfp_flags);
100 	if (!scratch)
101 		goto err_nofree;
102 
103 	xdr_init_decode(&stream, &buf, NULL);
104 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
105 
106 	status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
107 	if (unlikely(status)) {
108 		dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
109 			status);
110 		goto err;
111 	}
112 
113 	__free_page(scratch);
114 
115 	dprintk("%s: Return %p\n", __func__, lseg);
116 	return lseg;
117 
118 err:
119 	__free_page(scratch);
120 err_nofree:
121 	dprintk("%s: Err Return=>%d\n", __func__, status);
122 	return ERR_PTR(status);
123 }
124 
125 /*
126  * Free a layout segement
127  */
128 void
objlayout_free_lseg(struct pnfs_layout_segment * lseg)129 objlayout_free_lseg(struct pnfs_layout_segment *lseg)
130 {
131 	dprintk("%s: freeing layout segment %p\n", __func__, lseg);
132 
133 	if (unlikely(!lseg))
134 		return;
135 
136 	objio_free_lseg(lseg);
137 }
138 
139 /*
140  * I/O Operations
141  */
142 static inline u64
end_offset(u64 start,u64 len)143 end_offset(u64 start, u64 len)
144 {
145 	u64 end;
146 
147 	end = start + len;
148 	return end >= start ? end : NFS4_MAX_UINT64;
149 }
150 
151 /* last octet in a range */
152 static inline u64
last_byte_offset(u64 start,u64 len)153 last_byte_offset(u64 start, u64 len)
154 {
155 	u64 end;
156 
157 	BUG_ON(!len);
158 	end = start + len;
159 	return end > start ? end - 1 : NFS4_MAX_UINT64;
160 }
161 
_fix_verify_io_params(struct pnfs_layout_segment * lseg,struct page *** p_pages,unsigned * p_pgbase,u64 offset,unsigned long count)162 static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
163 			   struct page ***p_pages, unsigned *p_pgbase,
164 			   u64 offset, unsigned long count)
165 {
166 	u64 lseg_end_offset;
167 
168 	BUG_ON(offset < lseg->pls_range.offset);
169 	lseg_end_offset = end_offset(lseg->pls_range.offset,
170 				     lseg->pls_range.length);
171 	BUG_ON(offset >= lseg_end_offset);
172 	WARN_ON(offset + count > lseg_end_offset);
173 
174 	if (*p_pgbase > PAGE_SIZE) {
175 		dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
176 		*p_pages += *p_pgbase >> PAGE_SHIFT;
177 		*p_pgbase &= ~PAGE_MASK;
178 	}
179 }
180 
181 /*
182  * I/O done common code
183  */
184 static void
objlayout_iodone(struct objlayout_io_res * oir)185 objlayout_iodone(struct objlayout_io_res *oir)
186 {
187 	if (likely(oir->status >= 0)) {
188 		objio_free_result(oir);
189 	} else {
190 		struct objlayout *objlay = oir->objlay;
191 
192 		spin_lock(&objlay->lock);
193 		objlay->delta_space_valid = OBJ_DSU_INVALID;
194 		list_add(&objlay->err_list, &oir->err_list);
195 		spin_unlock(&objlay->lock);
196 	}
197 }
198 
199 /*
200  * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
201  *
202  * The @index component IO failed (error returned from target). Register
203  * the error for later reporting at layout-return.
204  */
205 void
objlayout_io_set_result(struct objlayout_io_res * oir,unsigned index,struct pnfs_osd_objid * pooid,int osd_error,u64 offset,u64 length,bool is_write)206 objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
207 			struct pnfs_osd_objid *pooid, int osd_error,
208 			u64 offset, u64 length, bool is_write)
209 {
210 	struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
211 
212 	BUG_ON(index >= oir->num_comps);
213 	if (osd_error) {
214 		ioerr->oer_component = *pooid;
215 		ioerr->oer_comp_offset = offset;
216 		ioerr->oer_comp_length = length;
217 		ioerr->oer_iswrite = is_write;
218 		ioerr->oer_errno = osd_error;
219 
220 		dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
221 			"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
222 			__func__, index, ioerr->oer_errno,
223 			ioerr->oer_iswrite,
224 			_DEVID_LO(&ioerr->oer_component.oid_device_id),
225 			_DEVID_HI(&ioerr->oer_component.oid_device_id),
226 			ioerr->oer_component.oid_partition_id,
227 			ioerr->oer_component.oid_object_id,
228 			ioerr->oer_comp_offset,
229 			ioerr->oer_comp_length);
230 	} else {
231 		/* User need not call if no error is reported */
232 		ioerr->oer_errno = 0;
233 	}
234 }
235 
236 /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
237  * This is because the osd completion is called with ints-off from
238  * the block layer
239  */
_rpc_read_complete(struct work_struct * work)240 static void _rpc_read_complete(struct work_struct *work)
241 {
242 	struct rpc_task *task;
243 	struct nfs_read_data *rdata;
244 
245 	dprintk("%s enter\n", __func__);
246 	task = container_of(work, struct rpc_task, u.tk_work);
247 	rdata = container_of(task, struct nfs_read_data, task);
248 
249 	pnfs_ld_read_done(rdata);
250 }
251 
252 void
objlayout_read_done(struct objlayout_io_res * oir,ssize_t status,bool sync)253 objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
254 {
255 	struct nfs_read_data *rdata = oir->rpcdata;
256 
257 	oir->status = rdata->task.tk_status = status;
258 	if (status >= 0)
259 		rdata->res.count = status;
260 	else
261 		rdata->pnfs_error = status;
262 	objlayout_iodone(oir);
263 	/* must not use oir after this point */
264 
265 	dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
266 		status, rdata->res.eof, sync);
267 
268 	if (sync)
269 		pnfs_ld_read_done(rdata);
270 	else {
271 		INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
272 		schedule_work(&rdata->task.u.tk_work);
273 	}
274 }
275 
276 /*
277  * Perform sync or async reads.
278  */
279 enum pnfs_try_status
objlayout_read_pagelist(struct nfs_read_data * rdata)280 objlayout_read_pagelist(struct nfs_read_data *rdata)
281 {
282 	loff_t offset = rdata->args.offset;
283 	size_t count = rdata->args.count;
284 	int err;
285 	loff_t eof;
286 
287 	eof = i_size_read(rdata->inode);
288 	if (unlikely(offset + count > eof)) {
289 		if (offset >= eof) {
290 			err = 0;
291 			rdata->res.count = 0;
292 			rdata->res.eof = 1;
293 			/*FIXME: do we need to call pnfs_ld_read_done() */
294 			goto out;
295 		}
296 		count = eof - offset;
297 	}
298 
299 	rdata->res.eof = (offset + count) >= eof;
300 	_fix_verify_io_params(rdata->lseg, &rdata->args.pages,
301 			      &rdata->args.pgbase,
302 			      rdata->args.offset, rdata->args.count);
303 
304 	dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
305 		__func__, rdata->inode->i_ino, offset, count, rdata->res.eof);
306 
307 	err = objio_read_pagelist(rdata);
308  out:
309 	if (unlikely(err)) {
310 		rdata->pnfs_error = err;
311 		dprintk("%s: Returned Error %d\n", __func__, err);
312 		return PNFS_NOT_ATTEMPTED;
313 	}
314 	return PNFS_ATTEMPTED;
315 }
316 
317 /* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
318  * This is because the osd completion is called with ints-off from
319  * the block layer
320  */
_rpc_write_complete(struct work_struct * work)321 static void _rpc_write_complete(struct work_struct *work)
322 {
323 	struct rpc_task *task;
324 	struct nfs_write_data *wdata;
325 
326 	dprintk("%s enter\n", __func__);
327 	task = container_of(work, struct rpc_task, u.tk_work);
328 	wdata = container_of(task, struct nfs_write_data, task);
329 
330 	pnfs_ld_write_done(wdata);
331 }
332 
333 void
objlayout_write_done(struct objlayout_io_res * oir,ssize_t status,bool sync)334 objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
335 {
336 	struct nfs_write_data *wdata = oir->rpcdata;
337 
338 	oir->status = wdata->task.tk_status = status;
339 	if (status >= 0) {
340 		wdata->res.count = status;
341 		wdata->verf.committed = oir->committed;
342 	} else {
343 		wdata->pnfs_error = status;
344 	}
345 	objlayout_iodone(oir);
346 	/* must not use oir after this point */
347 
348 	dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
349 		status, wdata->verf.committed, sync);
350 
351 	if (sync)
352 		pnfs_ld_write_done(wdata);
353 	else {
354 		INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
355 		schedule_work(&wdata->task.u.tk_work);
356 	}
357 }
358 
359 /*
360  * Perform sync or async writes.
361  */
362 enum pnfs_try_status
objlayout_write_pagelist(struct nfs_write_data * wdata,int how)363 objlayout_write_pagelist(struct nfs_write_data *wdata,
364 			 int how)
365 {
366 	int err;
367 
368 	_fix_verify_io_params(wdata->lseg, &wdata->args.pages,
369 			      &wdata->args.pgbase,
370 			      wdata->args.offset, wdata->args.count);
371 
372 	err = objio_write_pagelist(wdata, how);
373 	if (unlikely(err)) {
374 		wdata->pnfs_error = err;
375 		dprintk("%s: Returned Error %d\n", __func__, err);
376 		return PNFS_NOT_ATTEMPTED;
377 	}
378 	return PNFS_ATTEMPTED;
379 }
380 
381 void
objlayout_encode_layoutcommit(struct pnfs_layout_hdr * pnfslay,struct xdr_stream * xdr,const struct nfs4_layoutcommit_args * args)382 objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
383 			      struct xdr_stream *xdr,
384 			      const struct nfs4_layoutcommit_args *args)
385 {
386 	struct objlayout *objlay = OBJLAYOUT(pnfslay);
387 	struct pnfs_osd_layoutupdate lou;
388 	__be32 *start;
389 
390 	dprintk("%s: Begin\n", __func__);
391 
392 	spin_lock(&objlay->lock);
393 	lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
394 	lou.dsu_delta = objlay->delta_space_used;
395 	objlay->delta_space_used = 0;
396 	objlay->delta_space_valid = OBJ_DSU_INIT;
397 	lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
398 	spin_unlock(&objlay->lock);
399 
400 	start = xdr_reserve_space(xdr, 4);
401 
402 	BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
403 
404 	*start = cpu_to_be32((xdr->p - start - 1) * 4);
405 
406 	dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
407 		lou.dsu_delta, lou.olu_ioerr_flag);
408 }
409 
410 static int
err_prio(u32 oer_errno)411 err_prio(u32 oer_errno)
412 {
413 	switch (oer_errno) {
414 	case 0:
415 		return 0;
416 
417 	case PNFS_OSD_ERR_RESOURCE:
418 		return OSD_ERR_PRI_RESOURCE;
419 	case PNFS_OSD_ERR_BAD_CRED:
420 		return OSD_ERR_PRI_BAD_CRED;
421 	case PNFS_OSD_ERR_NO_ACCESS:
422 		return OSD_ERR_PRI_NO_ACCESS;
423 	case PNFS_OSD_ERR_UNREACHABLE:
424 		return OSD_ERR_PRI_UNREACHABLE;
425 	case PNFS_OSD_ERR_NOT_FOUND:
426 		return OSD_ERR_PRI_NOT_FOUND;
427 	case PNFS_OSD_ERR_NO_SPACE:
428 		return OSD_ERR_PRI_NO_SPACE;
429 	default:
430 		WARN_ON(1);
431 		/* fallthrough */
432 	case PNFS_OSD_ERR_EIO:
433 		return OSD_ERR_PRI_EIO;
434 	}
435 }
436 
437 static void
merge_ioerr(struct pnfs_osd_ioerr * dest_err,const struct pnfs_osd_ioerr * src_err)438 merge_ioerr(struct pnfs_osd_ioerr *dest_err,
439 	    const struct pnfs_osd_ioerr *src_err)
440 {
441 	u64 dest_end, src_end;
442 
443 	if (!dest_err->oer_errno) {
444 		*dest_err = *src_err;
445 		/* accumulated device must be blank */
446 		memset(&dest_err->oer_component.oid_device_id, 0,
447 			sizeof(dest_err->oer_component.oid_device_id));
448 
449 		return;
450 	}
451 
452 	if (dest_err->oer_component.oid_partition_id !=
453 				src_err->oer_component.oid_partition_id)
454 		dest_err->oer_component.oid_partition_id = 0;
455 
456 	if (dest_err->oer_component.oid_object_id !=
457 				src_err->oer_component.oid_object_id)
458 		dest_err->oer_component.oid_object_id = 0;
459 
460 	if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
461 		dest_err->oer_comp_offset = src_err->oer_comp_offset;
462 
463 	dest_end = end_offset(dest_err->oer_comp_offset,
464 			      dest_err->oer_comp_length);
465 	src_end =  end_offset(src_err->oer_comp_offset,
466 			      src_err->oer_comp_length);
467 	if (dest_end < src_end)
468 		dest_end = src_end;
469 
470 	dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
471 
472 	if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
473 	    (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
474 			dest_err->oer_errno = src_err->oer_errno;
475 	} else if (src_err->oer_iswrite) {
476 		dest_err->oer_iswrite = true;
477 		dest_err->oer_errno = src_err->oer_errno;
478 	}
479 }
480 
481 static void
encode_accumulated_error(struct objlayout * objlay,__be32 * p)482 encode_accumulated_error(struct objlayout *objlay, __be32 *p)
483 {
484 	struct objlayout_io_res *oir, *tmp;
485 	struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
486 
487 	list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
488 		unsigned i;
489 
490 		for (i = 0; i < oir->num_comps; i++) {
491 			struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
492 
493 			if (!ioerr->oer_errno)
494 				continue;
495 
496 			printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
497 				"is_write=%d dev(%llx:%llx) par=0x%llx "
498 				"obj=0x%llx offset=0x%llx length=0x%llx\n",
499 				__func__, i, ioerr->oer_errno,
500 				ioerr->oer_iswrite,
501 				_DEVID_LO(&ioerr->oer_component.oid_device_id),
502 				_DEVID_HI(&ioerr->oer_component.oid_device_id),
503 				ioerr->oer_component.oid_partition_id,
504 				ioerr->oer_component.oid_object_id,
505 				ioerr->oer_comp_offset,
506 				ioerr->oer_comp_length);
507 
508 			merge_ioerr(&accumulated_err, ioerr);
509 		}
510 		list_del(&oir->err_list);
511 		objio_free_result(oir);
512 	}
513 
514 	pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
515 }
516 
517 void
objlayout_encode_layoutreturn(struct pnfs_layout_hdr * pnfslay,struct xdr_stream * xdr,const struct nfs4_layoutreturn_args * args)518 objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
519 			      struct xdr_stream *xdr,
520 			      const struct nfs4_layoutreturn_args *args)
521 {
522 	struct objlayout *objlay = OBJLAYOUT(pnfslay);
523 	struct objlayout_io_res *oir, *tmp;
524 	__be32 *start;
525 
526 	dprintk("%s: Begin\n", __func__);
527 	start = xdr_reserve_space(xdr, 4);
528 	BUG_ON(!start);
529 
530 	spin_lock(&objlay->lock);
531 
532 	list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
533 		__be32 *last_xdr = NULL, *p;
534 		unsigned i;
535 		int res = 0;
536 
537 		for (i = 0; i < oir->num_comps; i++) {
538 			struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
539 
540 			if (!ioerr->oer_errno)
541 				continue;
542 
543 			dprintk("%s: err[%d]: errno=%d is_write=%d "
544 				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
545 				"offset=0x%llx length=0x%llx\n",
546 				__func__, i, ioerr->oer_errno,
547 				ioerr->oer_iswrite,
548 				_DEVID_LO(&ioerr->oer_component.oid_device_id),
549 				_DEVID_HI(&ioerr->oer_component.oid_device_id),
550 				ioerr->oer_component.oid_partition_id,
551 				ioerr->oer_component.oid_object_id,
552 				ioerr->oer_comp_offset,
553 				ioerr->oer_comp_length);
554 
555 			p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
556 			if (unlikely(!p)) {
557 				res = -E2BIG;
558 				break; /* accumulated_error */
559 			}
560 
561 			last_xdr = p;
562 			pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
563 		}
564 
565 		/* TODO: use xdr_write_pages */
566 		if (unlikely(res)) {
567 			/* no space for even one error descriptor */
568 			BUG_ON(!last_xdr);
569 
570 			/* we've encountered a situation with lots and lots of
571 			 * errors and no space to encode them all. Use the last
572 			 * available slot to report the union of all the
573 			 * remaining errors.
574 			 */
575 			encode_accumulated_error(objlay, last_xdr);
576 			goto loop_done;
577 		}
578 		list_del(&oir->err_list);
579 		objio_free_result(oir);
580 	}
581 loop_done:
582 	spin_unlock(&objlay->lock);
583 
584 	*start = cpu_to_be32((xdr->p - start - 1) * 4);
585 	dprintk("%s: Return\n", __func__);
586 }
587 
588 
589 /*
590  * Get Device Info API for io engines
591  */
592 struct objlayout_deviceinfo {
593 	struct page *page;
594 	struct pnfs_osd_deviceaddr da; /* This must be last */
595 };
596 
597 /* Initialize and call nfs_getdeviceinfo, then decode and return a
598  * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
599  * should be called.
600  */
objlayout_get_deviceinfo(struct pnfs_layout_hdr * pnfslay,struct nfs4_deviceid * d_id,struct pnfs_osd_deviceaddr ** deviceaddr,gfp_t gfp_flags)601 int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
602 	struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
603 	gfp_t gfp_flags)
604 {
605 	struct objlayout_deviceinfo *odi;
606 	struct pnfs_device pd;
607 	struct page *page, **pages;
608 	u32 *p;
609 	int err;
610 
611 	page = alloc_page(gfp_flags);
612 	if (!page)
613 		return -ENOMEM;
614 
615 	pages = &page;
616 	pd.pages = pages;
617 
618 	memcpy(&pd.dev_id, d_id, sizeof(*d_id));
619 	pd.layout_type = LAYOUT_OSD2_OBJECTS;
620 	pd.pages = &page;
621 	pd.pgbase = 0;
622 	pd.pglen = PAGE_SIZE;
623 	pd.mincount = 0;
624 
625 	err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd);
626 	dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
627 	if (err)
628 		goto err_out;
629 
630 	p = page_address(page);
631 	odi = kzalloc(sizeof(*odi), gfp_flags);
632 	if (!odi) {
633 		err = -ENOMEM;
634 		goto err_out;
635 	}
636 	pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
637 	odi->page = page;
638 	*deviceaddr = &odi->da;
639 	return 0;
640 
641 err_out:
642 	__free_page(page);
643 	return err;
644 }
645 
objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr * deviceaddr)646 void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
647 {
648 	struct objlayout_deviceinfo *odi = container_of(deviceaddr,
649 						struct objlayout_deviceinfo,
650 						da);
651 
652 	__free_page(odi->page);
653 	kfree(odi);
654 }
655 
656 enum {
657 	OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
658 	OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
659 	OSD_LOGIN_UPCALL_PATHLEN  = 256
660 };
661 
662 static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
663 
664 module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
665 		    0600);
666 MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
667 
668 struct __auto_login {
669 	char uri[OBJLAYOUT_MAX_URI_LEN];
670 	char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
671 	char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
672 };
673 
__objlayout_upcall(struct __auto_login * login)674 static int __objlayout_upcall(struct __auto_login *login)
675 {
676 	static char *envp[] = { "HOME=/",
677 		"TERM=linux",
678 		"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
679 		NULL
680 	};
681 	char *argv[8];
682 	int ret;
683 
684 	if (unlikely(!osd_login_prog[0])) {
685 		dprintk("%s: osd_login_prog is disabled\n", __func__);
686 		return -EACCES;
687 	}
688 
689 	dprintk("%s uri: %s\n", __func__, login->uri);
690 	dprintk("%s osdname %s\n", __func__, login->osdname);
691 	dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
692 
693 	argv[0] = (char *)osd_login_prog;
694 	argv[1] = "-u";
695 	argv[2] = login->uri;
696 	argv[3] = "-o";
697 	argv[4] = login->osdname;
698 	argv[5] = "-s";
699 	argv[6] = login->systemid_hex;
700 	argv[7] = NULL;
701 
702 	ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
703 	/*
704 	 * Disable the upcall mechanism if we're getting an ENOENT or
705 	 * EACCES error. The admin can re-enable it on the fly by using
706 	 * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
707 	 * the problem has been fixed.
708 	 */
709 	if (ret == -ENOENT || ret == -EACCES) {
710 		printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
711 			"objlayoutdriver.osd_login_prog kernel parameter!\n",
712 			osd_login_prog);
713 		osd_login_prog[0] = '\0';
714 	}
715 	dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
716 
717 	return ret;
718 }
719 
720 /* Assume dest is all zeros */
__copy_nfsS_and_zero_terminate(struct nfs4_string s,char * dest,int max_len,const char * var_name)721 static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
722 					   char *dest, int max_len,
723 					   const char *var_name)
724 {
725 	if (!s.len)
726 		return;
727 
728 	if (s.len >= max_len) {
729 		pr_warn_ratelimited(
730 			"objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
731 			var_name, s.len, max_len);
732 		s.len = max_len - 1; /* space for null terminator */
733 	}
734 
735 	memcpy(dest, s.data, s.len);
736 }
737 
738 /* Assume sysid is all zeros */
_sysid_2_hex(struct nfs4_string s,char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])739 static void _sysid_2_hex(struct nfs4_string s,
740 		  char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
741 {
742 	int i;
743 	char *cur;
744 
745 	if (!s.len)
746 		return;
747 
748 	if (s.len != OSD_SYSTEMID_LEN) {
749 		pr_warn_ratelimited(
750 		    "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
751 		    s.len);
752 		if (s.len > OSD_SYSTEMID_LEN)
753 			s.len = OSD_SYSTEMID_LEN;
754 	}
755 
756 	cur = sysid;
757 	for (i = 0; i < s.len; i++)
758 		cur = hex_byte_pack(cur, s.data[i]);
759 }
760 
objlayout_autologin(struct pnfs_osd_deviceaddr * deviceaddr)761 int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
762 {
763 	int rc;
764 	struct __auto_login login;
765 
766 	if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
767 		return -ENODEV;
768 
769 	memset(&login, 0, sizeof(login));
770 	__copy_nfsS_and_zero_terminate(
771 		deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
772 		login.uri, sizeof(login.uri), "URI");
773 
774 	__copy_nfsS_and_zero_terminate(
775 		deviceaddr->oda_osdname,
776 		login.osdname, sizeof(login.osdname), "OSDNAME");
777 
778 	_sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
779 
780 	rc = __objlayout_upcall(&login);
781 	if (rc > 0) /* script returns positive values */
782 		rc = -ENODEV;
783 
784 	return rc;
785 }
786