1 /*
2  * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/pci.h>
35 #include <linux/poll.h>
36 #include <linux/cdev.h>
37 #include <linux/swap.h>
38 #include <linux/export.h>
39 #include <linux/vmalloc.h>
40 #include <linux/slab.h>
41 #include <linux/highmem.h>
42 #include <linux/io.h>
43 #include <linux/jiffies.h>
44 #include <linux/cpu.h>
45 #include <asm/pgtable.h>
46 
47 #include "ipath_kernel.h"
48 #include "ipath_common.h"
49 #include "ipath_user_sdma.h"
50 
51 static int ipath_open(struct inode *, struct file *);
52 static int ipath_close(struct inode *, struct file *);
53 static ssize_t ipath_write(struct file *, const char __user *, size_t,
54 			   loff_t *);
55 static ssize_t ipath_writev(struct kiocb *, const struct iovec *,
56 			    unsigned long , loff_t);
57 static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
58 static int ipath_mmap(struct file *, struct vm_area_struct *);
59 
60 static const struct file_operations ipath_file_ops = {
61 	.owner = THIS_MODULE,
62 	.write = ipath_write,
63 	.aio_write = ipath_writev,
64 	.open = ipath_open,
65 	.release = ipath_close,
66 	.poll = ipath_poll,
67 	.mmap = ipath_mmap,
68 	.llseek = noop_llseek,
69 };
70 
71 /*
72  * Convert kernel virtual addresses to physical addresses so they don't
73  * potentially conflict with the chip addresses used as mmap offsets.
74  * It doesn't really matter what mmap offset we use as long as we can
75  * interpret it correctly.
76  */
cvt_kvaddr(void * p)77 static u64 cvt_kvaddr(void *p)
78 {
79 	struct page *page;
80 	u64 paddr = 0;
81 
82 	page = vmalloc_to_page(p);
83 	if (page)
84 		paddr = page_to_pfn(page) << PAGE_SHIFT;
85 
86 	return paddr;
87 }
88 
ipath_get_base_info(struct file * fp,void __user * ubase,size_t ubase_size)89 static int ipath_get_base_info(struct file *fp,
90 			       void __user *ubase, size_t ubase_size)
91 {
92 	struct ipath_portdata *pd = port_fp(fp);
93 	int ret = 0;
94 	struct ipath_base_info *kinfo = NULL;
95 	struct ipath_devdata *dd = pd->port_dd;
96 	unsigned subport_cnt;
97 	int shared, master;
98 	size_t sz;
99 
100 	subport_cnt = pd->port_subport_cnt;
101 	if (!subport_cnt) {
102 		shared = 0;
103 		master = 0;
104 		subport_cnt = 1;
105 	} else {
106 		shared = 1;
107 		master = !subport_fp(fp);
108 	}
109 
110 	sz = sizeof(*kinfo);
111 	/* If port sharing is not requested, allow the old size structure */
112 	if (!shared)
113 		sz -= 7 * sizeof(u64);
114 	if (ubase_size < sz) {
115 		ipath_cdbg(PROC,
116 			   "Base size %zu, need %zu (version mismatch?)\n",
117 			   ubase_size, sz);
118 		ret = -EINVAL;
119 		goto bail;
120 	}
121 
122 	kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
123 	if (kinfo == NULL) {
124 		ret = -ENOMEM;
125 		goto bail;
126 	}
127 
128 	ret = dd->ipath_f_get_base_info(pd, kinfo);
129 	if (ret < 0)
130 		goto bail;
131 
132 	kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt;
133 	kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize;
134 	kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt;
135 	kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize;
136 	/*
137 	 * have to mmap whole thing
138 	 */
139 	kinfo->spi_rcv_egrbuftotlen =
140 		pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
141 	kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
142 	kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
143 		pd->port_rcvegrbuf_chunks;
144 	kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
145 	if (master)
146 		kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
147 	/*
148 	 * for this use, may be ipath_cfgports summed over all chips that
149 	 * are are configured and present
150 	 */
151 	kinfo->spi_nports = dd->ipath_cfgports;
152 	/* unit (chip/board) our port is on */
153 	kinfo->spi_unit = dd->ipath_unit;
154 	/* for now, only a single page */
155 	kinfo->spi_tid_maxsize = PAGE_SIZE;
156 
157 	/*
158 	 * Doing this per port, and based on the skip value, etc.  This has
159 	 * to be the actual buffer size, since the protocol code treats it
160 	 * as an array.
161 	 *
162 	 * These have to be set to user addresses in the user code via mmap.
163 	 * These values are used on return to user code for the mmap target
164 	 * addresses only.  For 32 bit, same 44 bit address problem, so use
165 	 * the physical address, not virtual.  Before 2.6.11, using the
166 	 * page_address() macro worked, but in 2.6.11, even that returns the
167 	 * full 64 bit address (upper bits all 1's).  So far, using the
168 	 * physical addresses (or chip offsets, for chip mapping) works, but
169 	 * no doubt some future kernel release will change that, and we'll be
170 	 * on to yet another method of dealing with this.
171 	 */
172 	kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
173 	kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
174 	kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
175 	kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
176 	kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
177 		(void *) dd->ipath_statusp -
178 		(void *) dd->ipath_pioavailregs_dma;
179 	if (!shared) {
180 		kinfo->spi_piocnt = pd->port_piocnt;
181 		kinfo->spi_piobufbase = (u64) pd->port_piobufs;
182 		kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
183 			dd->ipath_ureg_align * pd->port_port;
184 	} else if (master) {
185 		kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
186 				    (pd->port_piocnt % subport_cnt);
187 		/* Master's PIO buffers are after all the slave's */
188 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
189 			dd->ipath_palign *
190 			(pd->port_piocnt - kinfo->spi_piocnt);
191 	} else {
192 		unsigned slave = subport_fp(fp) - 1;
193 
194 		kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
195 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
196 			dd->ipath_palign * kinfo->spi_piocnt * slave;
197 	}
198 
199 	if (shared) {
200 		kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
201 			dd->ipath_ureg_align * pd->port_port;
202 		kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
203 		kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
204 		kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
205 
206 		kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
207 			PAGE_SIZE * subport_fp(fp));
208 
209 		kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
210 			pd->port_rcvhdrq_size * subport_fp(fp));
211 		kinfo->spi_rcvhdr_tailaddr = 0;
212 		kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
213 			pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
214 			subport_fp(fp));
215 
216 		kinfo->spi_subport_uregbase =
217 			cvt_kvaddr(pd->subport_uregbase);
218 		kinfo->spi_subport_rcvegrbuf =
219 			cvt_kvaddr(pd->subport_rcvegrbuf);
220 		kinfo->spi_subport_rcvhdr_base =
221 			cvt_kvaddr(pd->subport_rcvhdr_base);
222 		ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
223 			kinfo->spi_port, kinfo->spi_runtime_flags,
224 			(unsigned long long) kinfo->spi_subport_uregbase,
225 			(unsigned long long) kinfo->spi_subport_rcvegrbuf,
226 			(unsigned long long) kinfo->spi_subport_rcvhdr_base);
227 	}
228 
229 	/*
230 	 * All user buffers are 2KB buffers.  If we ever support
231 	 * giving 4KB buffers to user processes, this will need some
232 	 * work.
233 	 */
234 	kinfo->spi_pioindex = (kinfo->spi_piobufbase -
235 		(dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
236 	kinfo->spi_pioalign = dd->ipath_palign;
237 
238 	kinfo->spi_qpair = IPATH_KD_QP;
239 	/*
240 	 * user mode PIO buffers are always 2KB, even when 4KB can
241 	 * be received, and sent via the kernel; this is ibmaxlen
242 	 * for 2K MTU.
243 	 */
244 	kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32);
245 	kinfo->spi_mtu = dd->ipath_ibmaxlen;	/* maxlen, not ibmtu */
246 	kinfo->spi_port = pd->port_port;
247 	kinfo->spi_subport = subport_fp(fp);
248 	kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
249 	kinfo->spi_hw_version = dd->ipath_revision;
250 
251 	if (master) {
252 		kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
253 	}
254 
255 	sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
256 	if (copy_to_user(ubase, kinfo, sz))
257 		ret = -EFAULT;
258 
259 bail:
260 	kfree(kinfo);
261 	return ret;
262 }
263 
264 /**
265  * ipath_tid_update - update a port TID
266  * @pd: the port
267  * @fp: the ipath device file
268  * @ti: the TID information
269  *
270  * The new implementation as of Oct 2004 is that the driver assigns
271  * the tid and returns it to the caller.   To make it easier to
272  * catch bugs, and to reduce search time, we keep a cursor for
273  * each port, walking the shadow tid array to find one that's not
274  * in use.
275  *
276  * For now, if we can't allocate the full list, we fail, although
277  * in the long run, we'll allocate as many as we can, and the
278  * caller will deal with that by trying the remaining pages later.
279  * That means that when we fail, we have to mark the tids as not in
280  * use again, in our shadow copy.
281  *
282  * It's up to the caller to free the tids when they are done.
283  * We'll unlock the pages as they free them.
284  *
285  * Also, right now we are locking one page at a time, but since
286  * the intended use of this routine is for a single group of
287  * virtually contiguous pages, that should change to improve
288  * performance.
289  */
ipath_tid_update(struct ipath_portdata * pd,struct file * fp,const struct ipath_tid_info * ti)290 static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
291 			    const struct ipath_tid_info *ti)
292 {
293 	int ret = 0, ntids;
294 	u32 tid, porttid, cnt, i, tidcnt, tidoff;
295 	u16 *tidlist;
296 	struct ipath_devdata *dd = pd->port_dd;
297 	u64 physaddr;
298 	unsigned long vaddr;
299 	u64 __iomem *tidbase;
300 	unsigned long tidmap[8];
301 	struct page **pagep = NULL;
302 	unsigned subport = subport_fp(fp);
303 
304 	if (!dd->ipath_pageshadow) {
305 		ret = -ENOMEM;
306 		goto done;
307 	}
308 
309 	cnt = ti->tidcnt;
310 	if (!cnt) {
311 		ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n",
312 			  (unsigned long long) ti->tidlist);
313 		/*
314 		 * Should we treat as success?  likely a bug
315 		 */
316 		ret = -EFAULT;
317 		goto done;
318 	}
319 	porttid = pd->port_port * dd->ipath_rcvtidcnt;
320 	if (!pd->port_subport_cnt) {
321 		tidcnt = dd->ipath_rcvtidcnt;
322 		tid = pd->port_tidcursor;
323 		tidoff = 0;
324 	} else if (!subport) {
325 		tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
326 			 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
327 		tidoff = dd->ipath_rcvtidcnt - tidcnt;
328 		porttid += tidoff;
329 		tid = tidcursor_fp(fp);
330 	} else {
331 		tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
332 		tidoff = tidcnt * (subport - 1);
333 		porttid += tidoff;
334 		tid = tidcursor_fp(fp);
335 	}
336 	if (cnt > tidcnt) {
337 		/* make sure it all fits in port_tid_pg_list */
338 		dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
339 			 "TIDs, only trying max (%u)\n", cnt, tidcnt);
340 		cnt = tidcnt;
341 	}
342 	pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
343 	tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
344 
345 	memset(tidmap, 0, sizeof(tidmap));
346 	/* before decrement; chip actual # */
347 	ntids = tidcnt;
348 	tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
349 				   dd->ipath_rcvtidbase +
350 				   porttid * sizeof(*tidbase));
351 
352 	ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n",
353 		   pd->port_port, cnt, tid, tidbase);
354 
355 	/* virtual address of first page in transfer */
356 	vaddr = ti->tidvaddr;
357 	if (!access_ok(VERIFY_WRITE, (void __user *) vaddr,
358 		       cnt * PAGE_SIZE)) {
359 		ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n",
360 			  (void *)vaddr, cnt);
361 		ret = -EFAULT;
362 		goto done;
363 	}
364 	ret = ipath_get_user_pages(vaddr, cnt, pagep);
365 	if (ret) {
366 		if (ret == -EBUSY) {
367 			ipath_dbg("Failed to lock addr %p, %u pages "
368 				  "(already locked)\n",
369 				  (void *) vaddr, cnt);
370 			/*
371 			 * for now, continue, and see what happens but with
372 			 * the new implementation, this should never happen,
373 			 * unless perhaps the user has mpin'ed the pages
374 			 * themselves (something we need to test)
375 			 */
376 			ret = 0;
377 		} else {
378 			dev_info(&dd->pcidev->dev,
379 				 "Failed to lock addr %p, %u pages: "
380 				 "errno %d\n", (void *) vaddr, cnt, -ret);
381 			goto done;
382 		}
383 	}
384 	for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
385 		for (; ntids--; tid++) {
386 			if (tid == tidcnt)
387 				tid = 0;
388 			if (!dd->ipath_pageshadow[porttid + tid])
389 				break;
390 		}
391 		if (ntids < 0) {
392 			/*
393 			 * oops, wrapped all the way through their TIDs,
394 			 * and didn't have enough free; see comments at
395 			 * start of routine
396 			 */
397 			ipath_dbg("Not enough free TIDs for %u pages "
398 				  "(index %d), failing\n", cnt, i);
399 			i--;	/* last tidlist[i] not filled in */
400 			ret = -ENOMEM;
401 			break;
402 		}
403 		tidlist[i] = tid + tidoff;
404 		ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
405 			   "vaddr %lx\n", i, tid + tidoff, vaddr);
406 		/* we "know" system pages and TID pages are same size */
407 		dd->ipath_pageshadow[porttid + tid] = pagep[i];
408 		dd->ipath_physshadow[porttid + tid] = ipath_map_page(
409 			dd->pcidev, pagep[i], 0, PAGE_SIZE,
410 			PCI_DMA_FROMDEVICE);
411 		/*
412 		 * don't need atomic or it's overhead
413 		 */
414 		__set_bit(tid, tidmap);
415 		physaddr = dd->ipath_physshadow[porttid + tid];
416 		ipath_stats.sps_pagelocks++;
417 		ipath_cdbg(VERBOSE,
418 			   "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
419 			   tid, vaddr, (unsigned long long) physaddr,
420 			   pagep[i]);
421 		dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
422 				    physaddr);
423 		/*
424 		 * don't check this tid in ipath_portshadow, since we
425 		 * just filled it in; start with the next one.
426 		 */
427 		tid++;
428 	}
429 
430 	if (ret) {
431 		u32 limit;
432 	cleanup:
433 		/* jump here if copy out of updated info failed... */
434 		ipath_dbg("After failure (ret=%d), undo %d of %d entries\n",
435 			  -ret, i, cnt);
436 		/* same code that's in ipath_free_tid() */
437 		limit = sizeof(tidmap) * BITS_PER_BYTE;
438 		if (limit > tidcnt)
439 			/* just in case size changes in future */
440 			limit = tidcnt;
441 		tid = find_first_bit((const unsigned long *)tidmap, limit);
442 		for (; tid < limit; tid++) {
443 			if (!test_bit(tid, tidmap))
444 				continue;
445 			if (dd->ipath_pageshadow[porttid + tid]) {
446 				ipath_cdbg(VERBOSE, "Freeing TID %u\n",
447 					   tid);
448 				dd->ipath_f_put_tid(dd, &tidbase[tid],
449 						    RCVHQ_RCV_TYPE_EXPECTED,
450 						    dd->ipath_tidinvalid);
451 				pci_unmap_page(dd->pcidev,
452 					dd->ipath_physshadow[porttid + tid],
453 					PAGE_SIZE, PCI_DMA_FROMDEVICE);
454 				dd->ipath_pageshadow[porttid + tid] = NULL;
455 				ipath_stats.sps_pageunlocks++;
456 			}
457 		}
458 		ipath_release_user_pages(pagep, cnt);
459 	} else {
460 		/*
461 		 * Copy the updated array, with ipath_tid's filled in, back
462 		 * to user.  Since we did the copy in already, this "should
463 		 * never fail" If it does, we have to clean up...
464 		 */
465 		if (copy_to_user((void __user *)
466 				 (unsigned long) ti->tidlist,
467 				 tidlist, cnt * sizeof(*tidlist))) {
468 			ret = -EFAULT;
469 			goto cleanup;
470 		}
471 		if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
472 				 tidmap, sizeof tidmap)) {
473 			ret = -EFAULT;
474 			goto cleanup;
475 		}
476 		if (tid == tidcnt)
477 			tid = 0;
478 		if (!pd->port_subport_cnt)
479 			pd->port_tidcursor = tid;
480 		else
481 			tidcursor_fp(fp) = tid;
482 	}
483 
484 done:
485 	if (ret)
486 		ipath_dbg("Failed to map %u TID pages, failing with %d\n",
487 			  ti->tidcnt, -ret);
488 	return ret;
489 }
490 
491 /**
492  * ipath_tid_free - free a port TID
493  * @pd: the port
494  * @subport: the subport
495  * @ti: the TID info
496  *
497  * right now we are unlocking one page at a time, but since
498  * the intended use of this routine is for a single group of
499  * virtually contiguous pages, that should change to improve
500  * performance.  We check that the TID is in range for this port
501  * but otherwise don't check validity; if user has an error and
502  * frees the wrong tid, it's only their own data that can thereby
503  * be corrupted.  We do check that the TID was in use, for sanity
504  * We always use our idea of the saved address, not the address that
505  * they pass in to us.
506  */
507 
ipath_tid_free(struct ipath_portdata * pd,unsigned subport,const struct ipath_tid_info * ti)508 static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
509 			  const struct ipath_tid_info *ti)
510 {
511 	int ret = 0;
512 	u32 tid, porttid, cnt, limit, tidcnt;
513 	struct ipath_devdata *dd = pd->port_dd;
514 	u64 __iomem *tidbase;
515 	unsigned long tidmap[8];
516 
517 	if (!dd->ipath_pageshadow) {
518 		ret = -ENOMEM;
519 		goto done;
520 	}
521 
522 	if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
523 			   sizeof tidmap)) {
524 		ret = -EFAULT;
525 		goto done;
526 	}
527 
528 	porttid = pd->port_port * dd->ipath_rcvtidcnt;
529 	if (!pd->port_subport_cnt)
530 		tidcnt = dd->ipath_rcvtidcnt;
531 	else if (!subport) {
532 		tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
533 			 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
534 		porttid += dd->ipath_rcvtidcnt - tidcnt;
535 	} else {
536 		tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
537 		porttid += tidcnt * (subport - 1);
538 	}
539 	tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
540 				   dd->ipath_rcvtidbase +
541 				   porttid * sizeof(*tidbase));
542 
543 	limit = sizeof(tidmap) * BITS_PER_BYTE;
544 	if (limit > tidcnt)
545 		/* just in case size changes in future */
546 		limit = tidcnt;
547 	tid = find_first_bit(tidmap, limit);
548 	ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) "
549 		   "set is %d, porttid %u\n", pd->port_port, ti->tidcnt,
550 		   limit, tid, porttid);
551 	for (cnt = 0; tid < limit; tid++) {
552 		/*
553 		 * small optimization; if we detect a run of 3 or so without
554 		 * any set, use find_first_bit again.  That's mainly to
555 		 * accelerate the case where we wrapped, so we have some at
556 		 * the beginning, and some at the end, and a big gap
557 		 * in the middle.
558 		 */
559 		if (!test_bit(tid, tidmap))
560 			continue;
561 		cnt++;
562 		if (dd->ipath_pageshadow[porttid + tid]) {
563 			struct page *p;
564 			p = dd->ipath_pageshadow[porttid + tid];
565 			dd->ipath_pageshadow[porttid + tid] = NULL;
566 			ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
567 				   pid_nr(pd->port_pid), tid);
568 			dd->ipath_f_put_tid(dd, &tidbase[tid],
569 					    RCVHQ_RCV_TYPE_EXPECTED,
570 					    dd->ipath_tidinvalid);
571 			pci_unmap_page(dd->pcidev,
572 				dd->ipath_physshadow[porttid + tid],
573 				PAGE_SIZE, PCI_DMA_FROMDEVICE);
574 			ipath_release_user_pages(&p, 1);
575 			ipath_stats.sps_pageunlocks++;
576 		} else
577 			ipath_dbg("Unused tid %u, ignoring\n", tid);
578 	}
579 	if (cnt != ti->tidcnt)
580 		ipath_dbg("passed in tidcnt %d, only %d bits set in map\n",
581 			  ti->tidcnt, cnt);
582 done:
583 	if (ret)
584 		ipath_dbg("Failed to unmap %u TID pages, failing with %d\n",
585 			  ti->tidcnt, -ret);
586 	return ret;
587 }
588 
589 /**
590  * ipath_set_part_key - set a partition key
591  * @pd: the port
592  * @key: the key
593  *
594  * We can have up to 4 active at a time (other than the default, which is
595  * always allowed).  This is somewhat tricky, since multiple ports may set
596  * the same key, so we reference count them, and clean up at exit.  All 4
597  * partition keys are packed into a single infinipath register.  It's an
598  * error for a process to set the same pkey multiple times.  We provide no
599  * mechanism to de-allocate a pkey at this time, we may eventually need to
600  * do that.  I've used the atomic operations, and no locking, and only make
601  * a single pass through what's available.  This should be more than
602  * adequate for some time. I'll think about spinlocks or the like if and as
603  * it's necessary.
604  */
ipath_set_part_key(struct ipath_portdata * pd,u16 key)605 static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
606 {
607 	struct ipath_devdata *dd = pd->port_dd;
608 	int i, any = 0, pidx = -1;
609 	u16 lkey = key & 0x7FFF;
610 	int ret;
611 
612 	if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) {
613 		/* nothing to do; this key always valid */
614 		ret = 0;
615 		goto bail;
616 	}
617 
618 	ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys "
619 		   "%hx:%x %hx:%x %hx:%x %hx:%x\n",
620 		   pd->port_port, key, dd->ipath_pkeys[0],
621 		   atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1],
622 		   atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2],
623 		   atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3],
624 		   atomic_read(&dd->ipath_pkeyrefs[3]));
625 
626 	if (!lkey) {
627 		ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n",
628 			   pd->port_port);
629 		ret = -EINVAL;
630 		goto bail;
631 	}
632 
633 	/*
634 	 * Set the full membership bit, because it has to be
635 	 * set in the register or the packet, and it seems
636 	 * cleaner to set in the register than to force all
637 	 * callers to set it. (see bug 4331)
638 	 */
639 	key |= 0x8000;
640 
641 	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
642 		if (!pd->port_pkeys[i] && pidx == -1)
643 			pidx = i;
644 		if (pd->port_pkeys[i] == key) {
645 			ipath_cdbg(VERBOSE, "p%u tries to set same pkey "
646 				   "(%x) more than once\n",
647 				   pd->port_port, key);
648 			ret = -EEXIST;
649 			goto bail;
650 		}
651 	}
652 	if (pidx == -1) {
653 		ipath_dbg("All pkeys for port %u already in use, "
654 			  "can't set %x\n", pd->port_port, key);
655 		ret = -EBUSY;
656 		goto bail;
657 	}
658 	for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
659 		if (!dd->ipath_pkeys[i]) {
660 			any++;
661 			continue;
662 		}
663 		if (dd->ipath_pkeys[i] == key) {
664 			atomic_t *pkrefs = &dd->ipath_pkeyrefs[i];
665 
666 			if (atomic_inc_return(pkrefs) > 1) {
667 				pd->port_pkeys[pidx] = key;
668 				ipath_cdbg(VERBOSE, "p%u set key %x "
669 					   "matches #%d, count now %d\n",
670 					   pd->port_port, key, i,
671 					   atomic_read(pkrefs));
672 				ret = 0;
673 				goto bail;
674 			} else {
675 				/*
676 				 * lost race, decrement count, catch below
677 				 */
678 				atomic_dec(pkrefs);
679 				ipath_cdbg(VERBOSE, "Lost race, count was "
680 					   "0, after dec, it's %d\n",
681 					   atomic_read(pkrefs));
682 				any++;
683 			}
684 		}
685 		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
686 			/*
687 			 * It makes no sense to have both the limited and
688 			 * full membership PKEY set at the same time since
689 			 * the unlimited one will disable the limited one.
690 			 */
691 			ret = -EEXIST;
692 			goto bail;
693 		}
694 	}
695 	if (!any) {
696 		ipath_dbg("port %u, all pkeys already in use, "
697 			  "can't set %x\n", pd->port_port, key);
698 		ret = -EBUSY;
699 		goto bail;
700 	}
701 	for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
702 		if (!dd->ipath_pkeys[i] &&
703 		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
704 			u64 pkey;
705 
706 			/* for ipathstats, etc. */
707 			ipath_stats.sps_pkeys[i] = lkey;
708 			pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;
709 			pkey =
710 				(u64) dd->ipath_pkeys[0] |
711 				((u64) dd->ipath_pkeys[1] << 16) |
712 				((u64) dd->ipath_pkeys[2] << 32) |
713 				((u64) dd->ipath_pkeys[3] << 48);
714 			ipath_cdbg(PROC, "p%u set key %x in #%d, "
715 				   "portidx %d, new pkey reg %llx\n",
716 				   pd->port_port, key, i, pidx,
717 				   (unsigned long long) pkey);
718 			ipath_write_kreg(
719 				dd, dd->ipath_kregs->kr_partitionkey, pkey);
720 
721 			ret = 0;
722 			goto bail;
723 		}
724 	}
725 	ipath_dbg("port %u, all pkeys already in use 2nd pass, "
726 		  "can't set %x\n", pd->port_port, key);
727 	ret = -EBUSY;
728 
729 bail:
730 	return ret;
731 }
732 
733 /**
734  * ipath_manage_rcvq - manage a port's receive queue
735  * @pd: the port
736  * @subport: the subport
737  * @start_stop: action to carry out
738  *
739  * start_stop == 0 disables receive on the port, for use in queue
740  * overflow conditions.  start_stop==1 re-enables, to be used to
741  * re-init the software copy of the head register
742  */
ipath_manage_rcvq(struct ipath_portdata * pd,unsigned subport,int start_stop)743 static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
744 			     int start_stop)
745 {
746 	struct ipath_devdata *dd = pd->port_dd;
747 
748 	ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
749 		   start_stop ? "en" : "dis", dd->ipath_unit,
750 		   pd->port_port, subport);
751 	if (subport)
752 		goto bail;
753 	/* atomically clear receive enable port. */
754 	if (start_stop) {
755 		/*
756 		 * On enable, force in-memory copy of the tail register to
757 		 * 0, so that protocol code doesn't have to worry about
758 		 * whether or not the chip has yet updated the in-memory
759 		 * copy or not on return from the system call. The chip
760 		 * always resets it's tail register back to 0 on a
761 		 * transition from disabled to enabled.  This could cause a
762 		 * problem if software was broken, and did the enable w/o
763 		 * the disable, but eventually the in-memory copy will be
764 		 * updated and correct itself, even in the face of software
765 		 * bugs.
766 		 */
767 		if (pd->port_rcvhdrtail_kvaddr)
768 			ipath_clear_rcvhdrtail(pd);
769 		set_bit(dd->ipath_r_portenable_shift + pd->port_port,
770 			&dd->ipath_rcvctrl);
771 	} else
772 		clear_bit(dd->ipath_r_portenable_shift + pd->port_port,
773 			  &dd->ipath_rcvctrl);
774 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
775 			 dd->ipath_rcvctrl);
776 	/* now be sure chip saw it before we return */
777 	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
778 	if (start_stop) {
779 		/*
780 		 * And try to be sure that tail reg update has happened too.
781 		 * This should in theory interlock with the RXE changes to
782 		 * the tail register.  Don't assign it to the tail register
783 		 * in memory copy, since we could overwrite an update by the
784 		 * chip if we did.
785 		 */
786 		ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
787 	}
788 	/* always; new head should be equal to new tail; see above */
789 bail:
790 	return 0;
791 }
792 
ipath_clean_part_key(struct ipath_portdata * pd,struct ipath_devdata * dd)793 static void ipath_clean_part_key(struct ipath_portdata *pd,
794 				 struct ipath_devdata *dd)
795 {
796 	int i, j, pchanged = 0;
797 	u64 oldpkey;
798 
799 	/* for debugging only */
800 	oldpkey = (u64) dd->ipath_pkeys[0] |
801 		((u64) dd->ipath_pkeys[1] << 16) |
802 		((u64) dd->ipath_pkeys[2] << 32) |
803 		((u64) dd->ipath_pkeys[3] << 48);
804 
805 	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
806 		if (!pd->port_pkeys[i])
807 			continue;
808 		ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i,
809 			   pd->port_pkeys[i]);
810 		for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) {
811 			/* check for match independent of the global bit */
812 			if ((dd->ipath_pkeys[j] & 0x7fff) !=
813 			    (pd->port_pkeys[i] & 0x7fff))
814 				continue;
815 			if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) {
816 				ipath_cdbg(VERBOSE, "p%u clear key "
817 					   "%x matches #%d\n",
818 					   pd->port_port,
819 					   pd->port_pkeys[i], j);
820 				ipath_stats.sps_pkeys[j] =
821 					dd->ipath_pkeys[j] = 0;
822 				pchanged++;
823 			}
824 			else ipath_cdbg(
825 				VERBOSE, "p%u key %x matches #%d, "
826 				"but ref still %d\n", pd->port_port,
827 				pd->port_pkeys[i], j,
828 				atomic_read(&dd->ipath_pkeyrefs[j]));
829 			break;
830 		}
831 		pd->port_pkeys[i] = 0;
832 	}
833 	if (pchanged) {
834 		u64 pkey = (u64) dd->ipath_pkeys[0] |
835 			((u64) dd->ipath_pkeys[1] << 16) |
836 			((u64) dd->ipath_pkeys[2] << 32) |
837 			((u64) dd->ipath_pkeys[3] << 48);
838 		ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, "
839 			   "new pkey reg %llx\n", pd->port_port,
840 			   (unsigned long long) oldpkey,
841 			   (unsigned long long) pkey);
842 		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
843 				 pkey);
844 	}
845 }
846 
847 /*
848  * Initialize the port data with the receive buffer sizes
849  * so this can be done while the master port is locked.
850  * Otherwise, there is a race with a slave opening the port
851  * and seeing these fields uninitialized.
852  */
init_user_egr_sizes(struct ipath_portdata * pd)853 static void init_user_egr_sizes(struct ipath_portdata *pd)
854 {
855 	struct ipath_devdata *dd = pd->port_dd;
856 	unsigned egrperchunk, egrcnt, size;
857 
858 	/*
859 	 * to avoid wasting a lot of memory, we allocate 32KB chunks of
860 	 * physically contiguous memory, advance through it until used up
861 	 * and then allocate more.  Of course, we need memory to store those
862 	 * extra pointers, now.  Started out with 256KB, but under heavy
863 	 * memory pressure (creating large files and then copying them over
864 	 * NFS while doing lots of MPI jobs), we hit some allocation
865 	 * failures, even though we can sleep...  (2.6.10) Still get
866 	 * failures at 64K.  32K is the lowest we can go without wasting
867 	 * additional memory.
868 	 */
869 	size = 0x8000;
870 	egrperchunk = size / dd->ipath_rcvegrbufsize;
871 	egrcnt = dd->ipath_rcvegrcnt;
872 	pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
873 	pd->port_rcvegrbufs_perchunk = egrperchunk;
874 	pd->port_rcvegrbuf_size = size;
875 }
876 
877 /**
878  * ipath_create_user_egr - allocate eager TID buffers
879  * @pd: the port to allocate TID buffers for
880  *
881  * This routine is now quite different for user and kernel, because
882  * the kernel uses skb's, for the accelerated network performance
883  * This is the user port version
884  *
885  * Allocate the eager TID buffers and program them into infinipath
886  * They are no longer completely contiguous, we do multiple allocation
887  * calls.
888  */
ipath_create_user_egr(struct ipath_portdata * pd)889 static int ipath_create_user_egr(struct ipath_portdata *pd)
890 {
891 	struct ipath_devdata *dd = pd->port_dd;
892 	unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
893 	size_t size;
894 	int ret;
895 	gfp_t gfp_flags;
896 
897 	/*
898 	 * GFP_USER, but without GFP_FS, so buffer cache can be
899 	 * coalesced (we hope); otherwise, even at order 4,
900 	 * heavy filesystem activity makes these fail, and we can
901 	 * use compound pages.
902 	 */
903 	gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
904 
905 	egrcnt = dd->ipath_rcvegrcnt;
906 	/* TID number offset for this port */
907 	egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt;
908 	egrsize = dd->ipath_rcvegrbufsize;
909 	ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
910 		   "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
911 
912 	chunk = pd->port_rcvegrbuf_chunks;
913 	egrperchunk = pd->port_rcvegrbufs_perchunk;
914 	size = pd->port_rcvegrbuf_size;
915 	pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]),
916 				     GFP_KERNEL);
917 	if (!pd->port_rcvegrbuf) {
918 		ret = -ENOMEM;
919 		goto bail;
920 	}
921 	pd->port_rcvegrbuf_phys =
922 		kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]),
923 			GFP_KERNEL);
924 	if (!pd->port_rcvegrbuf_phys) {
925 		ret = -ENOMEM;
926 		goto bail_rcvegrbuf;
927 	}
928 	for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
929 
930 		pd->port_rcvegrbuf[e] = dma_alloc_coherent(
931 			&dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],
932 			gfp_flags);
933 
934 		if (!pd->port_rcvegrbuf[e]) {
935 			ret = -ENOMEM;
936 			goto bail_rcvegrbuf_phys;
937 		}
938 	}
939 
940 	pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0];
941 
942 	for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {
943 		dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk];
944 		unsigned i;
945 
946 		for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
947 			dd->ipath_f_put_tid(dd, e + egroff +
948 					    (u64 __iomem *)
949 					    ((char __iomem *)
950 					     dd->ipath_kregbase +
951 					     dd->ipath_rcvegrbase),
952 					    RCVHQ_RCV_TYPE_EAGER, pa);
953 			pa += egrsize;
954 		}
955 		cond_resched();	/* don't hog the cpu */
956 	}
957 
958 	ret = 0;
959 	goto bail;
960 
961 bail_rcvegrbuf_phys:
962 	for (e = 0; e < pd->port_rcvegrbuf_chunks &&
963 		pd->port_rcvegrbuf[e]; e++) {
964 		dma_free_coherent(&dd->pcidev->dev, size,
965 				  pd->port_rcvegrbuf[e],
966 				  pd->port_rcvegrbuf_phys[e]);
967 
968 	}
969 	kfree(pd->port_rcvegrbuf_phys);
970 	pd->port_rcvegrbuf_phys = NULL;
971 bail_rcvegrbuf:
972 	kfree(pd->port_rcvegrbuf);
973 	pd->port_rcvegrbuf = NULL;
974 bail:
975 	return ret;
976 }
977 
978 
979 /* common code for the mappings on dma_alloc_coherent mem */
ipath_mmap_mem(struct vm_area_struct * vma,struct ipath_portdata * pd,unsigned len,int write_ok,void * kvaddr,char * what)980 static int ipath_mmap_mem(struct vm_area_struct *vma,
981 	struct ipath_portdata *pd, unsigned len, int write_ok,
982 	void *kvaddr, char *what)
983 {
984 	struct ipath_devdata *dd = pd->port_dd;
985 	unsigned long pfn;
986 	int ret;
987 
988 	if ((vma->vm_end - vma->vm_start) > len) {
989 		dev_info(&dd->pcidev->dev,
990 		         "FAIL on %s: len %lx > %x\n", what,
991 			 vma->vm_end - vma->vm_start, len);
992 		ret = -EFAULT;
993 		goto bail;
994 	}
995 
996 	if (!write_ok) {
997 		if (vma->vm_flags & VM_WRITE) {
998 			dev_info(&dd->pcidev->dev,
999 				 "%s must be mapped readonly\n", what);
1000 			ret = -EPERM;
1001 			goto bail;
1002 		}
1003 
1004 		/* don't allow them to later change with mprotect */
1005 		vma->vm_flags &= ~VM_MAYWRITE;
1006 	}
1007 
1008 	pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
1009 	ret = remap_pfn_range(vma, vma->vm_start, pfn,
1010 			      len, vma->vm_page_prot);
1011 	if (ret)
1012 		dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
1013 			 "bytes r%c failed: %d\n", what, pd->port_port,
1014 			 pfn, len, write_ok?'w':'o', ret);
1015 	else
1016 		ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
1017 			   "r%c\n", what, pd->port_port, pfn, len,
1018 			   write_ok?'w':'o');
1019 bail:
1020 	return ret;
1021 }
1022 
mmap_ureg(struct vm_area_struct * vma,struct ipath_devdata * dd,u64 ureg)1023 static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
1024 		     u64 ureg)
1025 {
1026 	unsigned long phys;
1027 	int ret;
1028 
1029 	/*
1030 	 * This is real hardware, so use io_remap.  This is the mechanism
1031 	 * for the user process to update the head registers for their port
1032 	 * in the chip.
1033 	 */
1034 	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
1035 		dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
1036 			 "%lx > PAGE\n", vma->vm_end - vma->vm_start);
1037 		ret = -EFAULT;
1038 	} else {
1039 		phys = dd->ipath_physaddr + ureg;
1040 		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1041 
1042 		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
1043 		ret = io_remap_pfn_range(vma, vma->vm_start,
1044 					 phys >> PAGE_SHIFT,
1045 					 vma->vm_end - vma->vm_start,
1046 					 vma->vm_page_prot);
1047 	}
1048 	return ret;
1049 }
1050 
mmap_piobufs(struct vm_area_struct * vma,struct ipath_devdata * dd,struct ipath_portdata * pd,unsigned piobufs,unsigned piocnt)1051 static int mmap_piobufs(struct vm_area_struct *vma,
1052 			struct ipath_devdata *dd,
1053 			struct ipath_portdata *pd,
1054 			unsigned piobufs, unsigned piocnt)
1055 {
1056 	unsigned long phys;
1057 	int ret;
1058 
1059 	/*
1060 	 * When we map the PIO buffers in the chip, we want to map them as
1061 	 * writeonly, no read possible.   This prevents access to previous
1062 	 * process data, and catches users who might try to read the i/o
1063 	 * space due to a bug.
1064 	 */
1065 	if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
1066 		dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
1067 			 "reqlen %lx > PAGE\n",
1068 			 vma->vm_end - vma->vm_start);
1069 		ret = -EINVAL;
1070 		goto bail;
1071 	}
1072 
1073 	phys = dd->ipath_physaddr + piobufs;
1074 
1075 #if defined(__powerpc__)
1076 	/* There isn't a generic way to specify writethrough mappings */
1077 	pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
1078 	pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
1079 	pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
1080 #endif
1081 
1082 	/*
1083 	 * don't allow them to later change to readable with mprotect (for when
1084 	 * not initially mapped readable, as is normally the case)
1085 	 */
1086 	vma->vm_flags &= ~VM_MAYREAD;
1087 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
1088 
1089 	ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
1090 				 vma->vm_end - vma->vm_start,
1091 				 vma->vm_page_prot);
1092 bail:
1093 	return ret;
1094 }
1095 
mmap_rcvegrbufs(struct vm_area_struct * vma,struct ipath_portdata * pd)1096 static int mmap_rcvegrbufs(struct vm_area_struct *vma,
1097 			   struct ipath_portdata *pd)
1098 {
1099 	struct ipath_devdata *dd = pd->port_dd;
1100 	unsigned long start, size;
1101 	size_t total_size, i;
1102 	unsigned long pfn;
1103 	int ret;
1104 
1105 	size = pd->port_rcvegrbuf_size;
1106 	total_size = pd->port_rcvegrbuf_chunks * size;
1107 	if ((vma->vm_end - vma->vm_start) > total_size) {
1108 		dev_info(&dd->pcidev->dev, "FAIL on egr bufs: "
1109 			 "reqlen %lx > actual %lx\n",
1110 			 vma->vm_end - vma->vm_start,
1111 			 (unsigned long) total_size);
1112 		ret = -EINVAL;
1113 		goto bail;
1114 	}
1115 
1116 	if (vma->vm_flags & VM_WRITE) {
1117 		dev_info(&dd->pcidev->dev, "Can't map eager buffers as "
1118 			 "writable (flags=%lx)\n", vma->vm_flags);
1119 		ret = -EPERM;
1120 		goto bail;
1121 	}
1122 	/* don't allow them to later change to writeable with mprotect */
1123 	vma->vm_flags &= ~VM_MAYWRITE;
1124 
1125 	start = vma->vm_start;
1126 
1127 	for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
1128 		pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
1129 		ret = remap_pfn_range(vma, start, pfn, size,
1130 				      vma->vm_page_prot);
1131 		if (ret < 0)
1132 			goto bail;
1133 	}
1134 	ret = 0;
1135 
1136 bail:
1137 	return ret;
1138 }
1139 
1140 /*
1141  * ipath_file_vma_fault - handle a VMA page fault.
1142  */
ipath_file_vma_fault(struct vm_area_struct * vma,struct vm_fault * vmf)1143 static int ipath_file_vma_fault(struct vm_area_struct *vma,
1144 					struct vm_fault *vmf)
1145 {
1146 	struct page *page;
1147 
1148 	page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
1149 	if (!page)
1150 		return VM_FAULT_SIGBUS;
1151 	get_page(page);
1152 	vmf->page = page;
1153 
1154 	return 0;
1155 }
1156 
1157 static const struct vm_operations_struct ipath_file_vm_ops = {
1158 	.fault = ipath_file_vma_fault,
1159 };
1160 
mmap_kvaddr(struct vm_area_struct * vma,u64 pgaddr,struct ipath_portdata * pd,unsigned subport)1161 static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
1162 		       struct ipath_portdata *pd, unsigned subport)
1163 {
1164 	unsigned long len;
1165 	struct ipath_devdata *dd;
1166 	void *addr;
1167 	size_t size;
1168 	int ret = 0;
1169 
1170 	/* If the port is not shared, all addresses should be physical */
1171 	if (!pd->port_subport_cnt)
1172 		goto bail;
1173 
1174 	dd = pd->port_dd;
1175 	size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
1176 
1177 	/*
1178 	 * Each process has all the subport uregbase, rcvhdrq, and
1179 	 * rcvegrbufs mmapped - as an array for all the processes,
1180 	 * and also separately for this process.
1181 	 */
1182 	if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
1183 		addr = pd->subport_uregbase;
1184 		size = PAGE_SIZE * pd->port_subport_cnt;
1185 	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
1186 		addr = pd->subport_rcvhdr_base;
1187 		size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
1188 	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
1189 		addr = pd->subport_rcvegrbuf;
1190 		size *= pd->port_subport_cnt;
1191         } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
1192                                         PAGE_SIZE * subport)) {
1193                 addr = pd->subport_uregbase + PAGE_SIZE * subport;
1194                 size = PAGE_SIZE;
1195         } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
1196                                 pd->port_rcvhdrq_size * subport)) {
1197                 addr = pd->subport_rcvhdr_base +
1198                         pd->port_rcvhdrq_size * subport;
1199                 size = pd->port_rcvhdrq_size;
1200         } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
1201                                size * subport)) {
1202                 addr = pd->subport_rcvegrbuf + size * subport;
1203                 /* rcvegrbufs are read-only on the slave */
1204                 if (vma->vm_flags & VM_WRITE) {
1205                         dev_info(&dd->pcidev->dev,
1206                                  "Can't map eager buffers as "
1207                                  "writable (flags=%lx)\n", vma->vm_flags);
1208                         ret = -EPERM;
1209                         goto bail;
1210                 }
1211                 /*
1212                  * Don't allow permission to later change to writeable
1213                  * with mprotect.
1214                  */
1215                 vma->vm_flags &= ~VM_MAYWRITE;
1216 	} else {
1217 		goto bail;
1218 	}
1219 	len = vma->vm_end - vma->vm_start;
1220 	if (len > size) {
1221 		ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
1222 		ret = -EINVAL;
1223 		goto bail;
1224 	}
1225 
1226 	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
1227 	vma->vm_ops = &ipath_file_vm_ops;
1228 	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
1229 	ret = 1;
1230 
1231 bail:
1232 	return ret;
1233 }
1234 
1235 /**
1236  * ipath_mmap - mmap various structures into user space
1237  * @fp: the file pointer
1238  * @vma: the VM area
1239  *
1240  * We use this to have a shared buffer between the kernel and the user code
1241  * for the rcvhdr queue, egr buffers, and the per-port user regs and pio
1242  * buffers in the chip.  We have the open and close entries so we can bump
1243  * the ref count and keep the driver from being unloaded while still mapped.
1244  */
ipath_mmap(struct file * fp,struct vm_area_struct * vma)1245 static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1246 {
1247 	struct ipath_portdata *pd;
1248 	struct ipath_devdata *dd;
1249 	u64 pgaddr, ureg;
1250 	unsigned piobufs, piocnt;
1251 	int ret;
1252 
1253 	pd = port_fp(fp);
1254 	if (!pd) {
1255 		ret = -EINVAL;
1256 		goto bail;
1257 	}
1258 	dd = pd->port_dd;
1259 
1260 	/*
1261 	 * This is the ipath_do_user_init() code, mapping the shared buffers
1262 	 * into the user process. The address referred to by vm_pgoff is the
1263 	 * file offset passed via mmap().  For shared ports, this is the
1264 	 * kernel vmalloc() address of the pages to share with the master.
1265 	 * For non-shared or master ports, this is a physical address.
1266 	 * We only do one mmap for each space mapped.
1267 	 */
1268 	pgaddr = vma->vm_pgoff << PAGE_SHIFT;
1269 
1270 	/*
1271 	 * Check for 0 in case one of the allocations failed, but user
1272 	 * called mmap anyway.
1273 	 */
1274 	if (!pgaddr)  {
1275 		ret = -EINVAL;
1276 		goto bail;
1277 	}
1278 
1279 	ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
1280 		   (unsigned long long) pgaddr, vma->vm_start,
1281 		   vma->vm_end - vma->vm_start, dd->ipath_unit,
1282 		   pd->port_port, subport_fp(fp));
1283 
1284 	/*
1285 	 * Physical addresses must fit in 40 bits for our hardware.
1286 	 * Check for kernel virtual addresses first, anything else must
1287 	 * match a HW or memory address.
1288 	 */
1289 	ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
1290 	if (ret) {
1291 		if (ret > 0)
1292 			ret = 0;
1293 		goto bail;
1294 	}
1295 
1296 	ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
1297 	if (!pd->port_subport_cnt) {
1298 		/* port is not shared */
1299 		piocnt = pd->port_piocnt;
1300 		piobufs = pd->port_piobufs;
1301 	} else if (!subport_fp(fp)) {
1302 		/* caller is the master */
1303 		piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
1304 			 (pd->port_piocnt % pd->port_subport_cnt);
1305 		piobufs = pd->port_piobufs +
1306 			dd->ipath_palign * (pd->port_piocnt - piocnt);
1307 	} else {
1308 		unsigned slave = subport_fp(fp) - 1;
1309 
1310 		/* caller is a slave */
1311 		piocnt = pd->port_piocnt / pd->port_subport_cnt;
1312 		piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
1313 	}
1314 
1315 	if (pgaddr == ureg)
1316 		ret = mmap_ureg(vma, dd, ureg);
1317 	else if (pgaddr == piobufs)
1318 		ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
1319 	else if (pgaddr == dd->ipath_pioavailregs_phys)
1320 		/* in-memory copy of pioavail registers */
1321 		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1322 			      	     (void *) dd->ipath_pioavailregs_dma,
1323 				     "pioavail registers");
1324 	else if (pgaddr == pd->port_rcvegr_phys)
1325 		ret = mmap_rcvegrbufs(vma, pd);
1326 	else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
1327 		/*
1328 		 * The rcvhdrq itself; readonly except on HT (so have
1329 		 * to allow writable mapping), multiple pages, contiguous
1330 		 * from an i/o perspective.
1331 		 */
1332 		ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
1333 				     pd->port_rcvhdrq,
1334 				     "rcvhdrq");
1335 	else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
1336 		/* in-memory copy of rcvhdrq tail register */
1337 		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1338 				     pd->port_rcvhdrtail_kvaddr,
1339 				     "rcvhdrq tail");
1340 	else
1341 		ret = -EINVAL;
1342 
1343 	vma->vm_private_data = NULL;
1344 
1345 	if (ret < 0)
1346 		dev_info(&dd->pcidev->dev,
1347 			 "Failure %d on off %llx len %lx\n",
1348 			 -ret, (unsigned long long)pgaddr,
1349 			 vma->vm_end - vma->vm_start);
1350 bail:
1351 	return ret;
1352 }
1353 
ipath_poll_hdrqfull(struct ipath_portdata * pd)1354 static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
1355 {
1356 	unsigned pollflag = 0;
1357 
1358 	if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
1359 	    pd->port_hdrqfull != pd->port_hdrqfull_poll) {
1360 		pollflag |= POLLIN | POLLRDNORM;
1361 		pd->port_hdrqfull_poll = pd->port_hdrqfull;
1362 	}
1363 
1364 	return pollflag;
1365 }
1366 
ipath_poll_urgent(struct ipath_portdata * pd,struct file * fp,struct poll_table_struct * pt)1367 static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
1368 				      struct file *fp,
1369 				      struct poll_table_struct *pt)
1370 {
1371 	unsigned pollflag = 0;
1372 	struct ipath_devdata *dd;
1373 
1374 	dd = pd->port_dd;
1375 
1376 	/* variable access in ipath_poll_hdrqfull() needs this */
1377 	rmb();
1378 	pollflag = ipath_poll_hdrqfull(pd);
1379 
1380 	if (pd->port_urgent != pd->port_urgent_poll) {
1381 		pollflag |= POLLIN | POLLRDNORM;
1382 		pd->port_urgent_poll = pd->port_urgent;
1383 	}
1384 
1385 	if (!pollflag) {
1386 		/* this saves a spin_lock/unlock in interrupt handler... */
1387 		set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
1388 		/* flush waiting flag so don't miss an event... */
1389 		wmb();
1390 		poll_wait(fp, &pd->port_wait, pt);
1391 	}
1392 
1393 	return pollflag;
1394 }
1395 
ipath_poll_next(struct ipath_portdata * pd,struct file * fp,struct poll_table_struct * pt)1396 static unsigned int ipath_poll_next(struct ipath_portdata *pd,
1397 				    struct file *fp,
1398 				    struct poll_table_struct *pt)
1399 {
1400 	u32 head;
1401 	u32 tail;
1402 	unsigned pollflag = 0;
1403 	struct ipath_devdata *dd;
1404 
1405 	dd = pd->port_dd;
1406 
1407 	/* variable access in ipath_poll_hdrqfull() needs this */
1408 	rmb();
1409 	pollflag = ipath_poll_hdrqfull(pd);
1410 
1411 	head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
1412 	if (pd->port_rcvhdrtail_kvaddr)
1413 		tail = ipath_get_rcvhdrtail(pd);
1414 	else
1415 		tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
1416 
1417 	if (head != tail)
1418 		pollflag |= POLLIN | POLLRDNORM;
1419 	else {
1420 		/* this saves a spin_lock/unlock in interrupt handler */
1421 		set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
1422 		/* flush waiting flag so we don't miss an event */
1423 		wmb();
1424 
1425 		set_bit(pd->port_port + dd->ipath_r_intravail_shift,
1426 			&dd->ipath_rcvctrl);
1427 
1428 		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1429 				 dd->ipath_rcvctrl);
1430 
1431 		if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
1432 			ipath_write_ureg(dd, ur_rcvhdrhead,
1433 					 dd->ipath_rhdrhead_intr_off | head,
1434 					 pd->port_port);
1435 
1436 		poll_wait(fp, &pd->port_wait, pt);
1437 	}
1438 
1439 	return pollflag;
1440 }
1441 
ipath_poll(struct file * fp,struct poll_table_struct * pt)1442 static unsigned int ipath_poll(struct file *fp,
1443 			       struct poll_table_struct *pt)
1444 {
1445 	struct ipath_portdata *pd;
1446 	unsigned pollflag;
1447 
1448 	pd = port_fp(fp);
1449 	if (!pd)
1450 		pollflag = 0;
1451 	else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
1452 		pollflag = ipath_poll_urgent(pd, fp, pt);
1453 	else
1454 		pollflag = ipath_poll_next(pd, fp, pt);
1455 
1456 	return pollflag;
1457 }
1458 
ipath_supports_subports(int user_swmajor,int user_swminor)1459 static int ipath_supports_subports(int user_swmajor, int user_swminor)
1460 {
1461 	/* no subport implementation prior to software version 1.3 */
1462 	return (user_swmajor > 1) || (user_swminor >= 3);
1463 }
1464 
ipath_compatible_subports(int user_swmajor,int user_swminor)1465 static int ipath_compatible_subports(int user_swmajor, int user_swminor)
1466 {
1467 	/* this code is written long-hand for clarity */
1468 	if (IPATH_USER_SWMAJOR != user_swmajor) {
1469 		/* no promise of compatibility if major mismatch */
1470 		return 0;
1471 	}
1472 	if (IPATH_USER_SWMAJOR == 1) {
1473 		switch (IPATH_USER_SWMINOR) {
1474 		case 0:
1475 		case 1:
1476 		case 2:
1477 			/* no subport implementation so cannot be compatible */
1478 			return 0;
1479 		case 3:
1480 			/* 3 is only compatible with itself */
1481 			return user_swminor == 3;
1482 		default:
1483 			/* >= 4 are compatible (or are expected to be) */
1484 			return user_swminor >= 4;
1485 		}
1486 	}
1487 	/* make no promises yet for future major versions */
1488 	return 0;
1489 }
1490 
init_subports(struct ipath_devdata * dd,struct ipath_portdata * pd,const struct ipath_user_info * uinfo)1491 static int init_subports(struct ipath_devdata *dd,
1492 			 struct ipath_portdata *pd,
1493 			 const struct ipath_user_info *uinfo)
1494 {
1495 	int ret = 0;
1496 	unsigned num_subports;
1497 	size_t size;
1498 
1499 	/*
1500 	 * If the user is requesting zero subports,
1501 	 * skip the subport allocation.
1502 	 */
1503 	if (uinfo->spu_subport_cnt <= 0)
1504 		goto bail;
1505 
1506 	/* Self-consistency check for ipath_compatible_subports() */
1507 	if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
1508 	    !ipath_compatible_subports(IPATH_USER_SWMAJOR,
1509 				       IPATH_USER_SWMINOR)) {
1510 		dev_info(&dd->pcidev->dev,
1511 			 "Inconsistent ipath_compatible_subports()\n");
1512 		goto bail;
1513 	}
1514 
1515 	/* Check for subport compatibility */
1516 	if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
1517 				       uinfo->spu_userversion & 0xffff)) {
1518 		dev_info(&dd->pcidev->dev,
1519 			 "Mismatched user version (%d.%d) and driver "
1520 			 "version (%d.%d) while port sharing. Ensure "
1521                          "that driver and library are from the same "
1522                          "release.\n",
1523 			 (int) (uinfo->spu_userversion >> 16),
1524                          (int) (uinfo->spu_userversion & 0xffff),
1525 			 IPATH_USER_SWMAJOR,
1526 	                 IPATH_USER_SWMINOR);
1527 		goto bail;
1528 	}
1529 	if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
1530 		ret = -EINVAL;
1531 		goto bail;
1532 	}
1533 
1534 	num_subports = uinfo->spu_subport_cnt;
1535 	pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports);
1536 	if (!pd->subport_uregbase) {
1537 		ret = -ENOMEM;
1538 		goto bail;
1539 	}
1540 	/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
1541 	size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1542 		     sizeof(u32), PAGE_SIZE) * num_subports;
1543 	pd->subport_rcvhdr_base = vzalloc(size);
1544 	if (!pd->subport_rcvhdr_base) {
1545 		ret = -ENOMEM;
1546 		goto bail_ureg;
1547 	}
1548 
1549 	pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks *
1550 					pd->port_rcvegrbuf_size *
1551 					num_subports);
1552 	if (!pd->subport_rcvegrbuf) {
1553 		ret = -ENOMEM;
1554 		goto bail_rhdr;
1555 	}
1556 
1557 	pd->port_subport_cnt = uinfo->spu_subport_cnt;
1558 	pd->port_subport_id = uinfo->spu_subport_id;
1559 	pd->active_slaves = 1;
1560 	set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
1561 	goto bail;
1562 
1563 bail_rhdr:
1564 	vfree(pd->subport_rcvhdr_base);
1565 bail_ureg:
1566 	vfree(pd->subport_uregbase);
1567 	pd->subport_uregbase = NULL;
1568 bail:
1569 	return ret;
1570 }
1571 
try_alloc_port(struct ipath_devdata * dd,int port,struct file * fp,const struct ipath_user_info * uinfo)1572 static int try_alloc_port(struct ipath_devdata *dd, int port,
1573 			  struct file *fp,
1574 			  const struct ipath_user_info *uinfo)
1575 {
1576 	struct ipath_portdata *pd;
1577 	int ret;
1578 
1579 	if (!(pd = dd->ipath_pd[port])) {
1580 		void *ptmp;
1581 
1582 		pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
1583 
1584 		/*
1585 		 * Allocate memory for use in ipath_tid_update() just once
1586 		 * at open, not per call.  Reduces cost of expected send
1587 		 * setup.
1588 		 */
1589 		ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
1590 			       dd->ipath_rcvtidcnt * sizeof(struct page **),
1591 			       GFP_KERNEL);
1592 		if (!pd || !ptmp) {
1593 			ipath_dev_err(dd, "Unable to allocate portdata "
1594 				      "memory, failing open\n");
1595 			ret = -ENOMEM;
1596 			kfree(pd);
1597 			kfree(ptmp);
1598 			goto bail;
1599 		}
1600 		dd->ipath_pd[port] = pd;
1601 		dd->ipath_pd[port]->port_port = port;
1602 		dd->ipath_pd[port]->port_dd = dd;
1603 		dd->ipath_pd[port]->port_tid_pg_list = ptmp;
1604 		init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
1605 	}
1606 	if (!pd->port_cnt) {
1607 		pd->userversion = uinfo->spu_userversion;
1608 		init_user_egr_sizes(pd);
1609 		if ((ret = init_subports(dd, pd, uinfo)) != 0)
1610 			goto bail;
1611 		ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
1612 			   current->comm, current->pid, dd->ipath_unit,
1613 			   port);
1614 		pd->port_cnt = 1;
1615 		port_fp(fp) = pd;
1616 		pd->port_pid = get_pid(task_pid(current));
1617 		strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
1618 		ipath_stats.sps_ports++;
1619 		ret = 0;
1620 	} else
1621 		ret = -EBUSY;
1622 
1623 bail:
1624 	return ret;
1625 }
1626 
usable(struct ipath_devdata * dd)1627 static inline int usable(struct ipath_devdata *dd)
1628 {
1629 	return dd &&
1630 		(dd->ipath_flags & IPATH_PRESENT) &&
1631 		dd->ipath_kregbase &&
1632 		dd->ipath_lid &&
1633 		!(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED
1634 				     | IPATH_LINKUNK));
1635 }
1636 
find_free_port(int unit,struct file * fp,const struct ipath_user_info * uinfo)1637 static int find_free_port(int unit, struct file *fp,
1638 			  const struct ipath_user_info *uinfo)
1639 {
1640 	struct ipath_devdata *dd = ipath_lookup(unit);
1641 	int ret, i;
1642 
1643 	if (!dd) {
1644 		ret = -ENODEV;
1645 		goto bail;
1646 	}
1647 
1648 	if (!usable(dd)) {
1649 		ret = -ENETDOWN;
1650 		goto bail;
1651 	}
1652 
1653 	for (i = 1; i < dd->ipath_cfgports; i++) {
1654 		ret = try_alloc_port(dd, i, fp, uinfo);
1655 		if (ret != -EBUSY)
1656 			goto bail;
1657 	}
1658 	ret = -EBUSY;
1659 
1660 bail:
1661 	return ret;
1662 }
1663 
find_best_unit(struct file * fp,const struct ipath_user_info * uinfo)1664 static int find_best_unit(struct file *fp,
1665 			  const struct ipath_user_info *uinfo)
1666 {
1667 	int ret = 0, i, prefunit = -1, devmax;
1668 	int maxofallports, npresent, nup;
1669 	int ndev;
1670 
1671 	devmax = ipath_count_units(&npresent, &nup, &maxofallports);
1672 
1673 	/*
1674 	 * This code is present to allow a knowledgeable person to
1675 	 * specify the layout of processes to processors before opening
1676 	 * this driver, and then we'll assign the process to the "closest"
1677 	 * InfiniPath chip to that processor (we assume reasonable connectivity,
1678 	 * for now).  This code assumes that if affinity has been set
1679 	 * before this point, that at most one cpu is set; for now this
1680 	 * is reasonable.  I check for both cpumask_empty() and cpumask_full(),
1681 	 * in case some kernel variant sets none of the bits when no
1682 	 * affinity is set.  2.6.11 and 12 kernels have all present
1683 	 * cpus set.  Some day we'll have to fix it up further to handle
1684 	 * a cpu subset.  This algorithm fails for two HT chips connected
1685 	 * in tunnel fashion.  Eventually this needs real topology
1686 	 * information.  There may be some issues with dual core numbering
1687 	 * as well.  This needs more work prior to release.
1688 	 */
1689 	if (!cpumask_empty(tsk_cpus_allowed(current)) &&
1690 	    !cpumask_full(tsk_cpus_allowed(current))) {
1691 		int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
1692 		get_online_cpus();
1693 		for_each_online_cpu(i)
1694 			if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) {
1695 				ipath_cdbg(PROC, "%s[%u] affinity set for "
1696 					   "cpu %d/%d\n", current->comm,
1697 					   current->pid, i, ncpus);
1698 				curcpu = i;
1699 				nset++;
1700 			}
1701 		put_online_cpus();
1702 		if (curcpu != -1 && nset != ncpus) {
1703 			if (npresent) {
1704 				prefunit = curcpu / (ncpus / npresent);
1705 				ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
1706 					  "%d cpus/chip, select unit %d\n",
1707 					  current->comm, current->pid,
1708 					  npresent, ncpus, ncpus / npresent,
1709 					  prefunit);
1710 			}
1711 		}
1712 	}
1713 
1714 	/*
1715 	 * user ports start at 1, kernel port is 0
1716 	 * For now, we do round-robin access across all chips
1717 	 */
1718 
1719 	if (prefunit != -1)
1720 		devmax = prefunit + 1;
1721 recheck:
1722 	for (i = 1; i < maxofallports; i++) {
1723 		for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
1724 		     ndev++) {
1725 			struct ipath_devdata *dd = ipath_lookup(ndev);
1726 
1727 			if (!usable(dd))
1728 				continue; /* can't use this unit */
1729 			if (i >= dd->ipath_cfgports)
1730 				/*
1731 				 * Maxed out on users of this unit. Try
1732 				 * next.
1733 				 */
1734 				continue;
1735 			ret = try_alloc_port(dd, i, fp, uinfo);
1736 			if (!ret)
1737 				goto done;
1738 		}
1739 	}
1740 
1741 	if (npresent) {
1742 		if (nup == 0) {
1743 			ret = -ENETDOWN;
1744 			ipath_dbg("No ports available (none initialized "
1745 				  "and ready)\n");
1746 		} else {
1747 			if (prefunit > 0) {
1748 				/* if started above 0, retry from 0 */
1749 				ipath_cdbg(PROC,
1750 					   "%s[%u] no ports on prefunit "
1751 					   "%d, clear and re-check\n",
1752 					   current->comm, current->pid,
1753 					   prefunit);
1754 				devmax = ipath_count_units(NULL, NULL,
1755 							   NULL);
1756 				prefunit = -1;
1757 				goto recheck;
1758 			}
1759 			ret = -EBUSY;
1760 			ipath_dbg("No ports available\n");
1761 		}
1762 	} else {
1763 		ret = -ENXIO;
1764 		ipath_dbg("No boards found\n");
1765 	}
1766 
1767 done:
1768 	return ret;
1769 }
1770 
find_shared_port(struct file * fp,const struct ipath_user_info * uinfo)1771 static int find_shared_port(struct file *fp,
1772 			    const struct ipath_user_info *uinfo)
1773 {
1774 	int devmax, ndev, i;
1775 	int ret = 0;
1776 
1777 	devmax = ipath_count_units(NULL, NULL, NULL);
1778 
1779 	for (ndev = 0; ndev < devmax; ndev++) {
1780 		struct ipath_devdata *dd = ipath_lookup(ndev);
1781 
1782 		if (!usable(dd))
1783 			continue;
1784 		for (i = 1; i < dd->ipath_cfgports; i++) {
1785 			struct ipath_portdata *pd = dd->ipath_pd[i];
1786 
1787 			/* Skip ports which are not yet open */
1788 			if (!pd || !pd->port_cnt)
1789 				continue;
1790 			/* Skip port if it doesn't match the requested one */
1791 			if (pd->port_subport_id != uinfo->spu_subport_id)
1792 				continue;
1793 			/* Verify the sharing process matches the master */
1794 			if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
1795 			    pd->userversion != uinfo->spu_userversion ||
1796 			    pd->port_cnt >= pd->port_subport_cnt) {
1797 				ret = -EINVAL;
1798 				goto done;
1799 			}
1800 			port_fp(fp) = pd;
1801 			subport_fp(fp) = pd->port_cnt++;
1802 			pd->port_subpid[subport_fp(fp)] =
1803 				get_pid(task_pid(current));
1804 			tidcursor_fp(fp) = 0;
1805 			pd->active_slaves |= 1 << subport_fp(fp);
1806 			ipath_cdbg(PROC,
1807 				   "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
1808 				   current->comm, current->pid,
1809 				   subport_fp(fp),
1810 				   pd->port_comm, pid_nr(pd->port_pid),
1811 				   dd->ipath_unit, pd->port_port);
1812 			ret = 1;
1813 			goto done;
1814 		}
1815 	}
1816 
1817 done:
1818 	return ret;
1819 }
1820 
ipath_open(struct inode * in,struct file * fp)1821 static int ipath_open(struct inode *in, struct file *fp)
1822 {
1823 	/* The real work is performed later in ipath_assign_port() */
1824 	fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
1825 	return fp->private_data ? 0 : -ENOMEM;
1826 }
1827 
1828 /* Get port early, so can set affinity prior to memory allocation */
ipath_assign_port(struct file * fp,const struct ipath_user_info * uinfo)1829 static int ipath_assign_port(struct file *fp,
1830 			      const struct ipath_user_info *uinfo)
1831 {
1832 	int ret;
1833 	int i_minor;
1834 	unsigned swmajor, swminor;
1835 
1836 	/* Check to be sure we haven't already initialized this file */
1837 	if (port_fp(fp)) {
1838 		ret = -EINVAL;
1839 		goto done;
1840 	}
1841 
1842 	/* for now, if major version is different, bail */
1843 	swmajor = uinfo->spu_userversion >> 16;
1844 	if (swmajor != IPATH_USER_SWMAJOR) {
1845 		ipath_dbg("User major version %d not same as driver "
1846 			  "major %d\n", uinfo->spu_userversion >> 16,
1847 			  IPATH_USER_SWMAJOR);
1848 		ret = -ENODEV;
1849 		goto done;
1850 	}
1851 
1852 	swminor = uinfo->spu_userversion & 0xffff;
1853 	if (swminor != IPATH_USER_SWMINOR)
1854 		ipath_dbg("User minor version %d not same as driver "
1855 			  "minor %d\n", swminor, IPATH_USER_SWMINOR);
1856 
1857 	mutex_lock(&ipath_mutex);
1858 
1859 	if (ipath_compatible_subports(swmajor, swminor) &&
1860 	    uinfo->spu_subport_cnt &&
1861 	    (ret = find_shared_port(fp, uinfo))) {
1862 		if (ret > 0)
1863 			ret = 0;
1864 		goto done_chk_sdma;
1865 	}
1866 
1867 	i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE;
1868 	ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
1869 		   (long)fp->f_path.dentry->d_inode->i_rdev, i_minor);
1870 
1871 	if (i_minor)
1872 		ret = find_free_port(i_minor - 1, fp, uinfo);
1873 	else
1874 		ret = find_best_unit(fp, uinfo);
1875 
1876 done_chk_sdma:
1877 	if (!ret) {
1878 		struct ipath_filedata *fd = fp->private_data;
1879 		const struct ipath_portdata *pd = fd->pd;
1880 		const struct ipath_devdata *dd = pd->port_dd;
1881 
1882 		fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev,
1883 						      dd->ipath_unit,
1884 						      pd->port_port,
1885 						      fd->subport);
1886 
1887 		if (!fd->pq)
1888 			ret = -ENOMEM;
1889 	}
1890 
1891 	mutex_unlock(&ipath_mutex);
1892 
1893 done:
1894 	return ret;
1895 }
1896 
1897 
ipath_do_user_init(struct file * fp,const struct ipath_user_info * uinfo)1898 static int ipath_do_user_init(struct file *fp,
1899 			      const struct ipath_user_info *uinfo)
1900 {
1901 	int ret;
1902 	struct ipath_portdata *pd = port_fp(fp);
1903 	struct ipath_devdata *dd;
1904 	u32 head32;
1905 
1906 	/* Subports don't need to initialize anything since master did it. */
1907 	if (subport_fp(fp)) {
1908 		ret = wait_event_interruptible(pd->port_wait,
1909 			!test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
1910 		goto done;
1911 	}
1912 
1913 	dd = pd->port_dd;
1914 
1915 	if (uinfo->spu_rcvhdrsize) {
1916 		ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
1917 		if (ret)
1918 			goto done;
1919 	}
1920 
1921 	/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
1922 
1923 	/* some ports may get extra buffers, calculate that here */
1924 	if (pd->port_port <= dd->ipath_ports_extrabuf)
1925 		pd->port_piocnt = dd->ipath_pbufsport + 1;
1926 	else
1927 		pd->port_piocnt = dd->ipath_pbufsport;
1928 
1929 	/* for right now, kernel piobufs are at end, so port 1 is at 0 */
1930 	if (pd->port_port <= dd->ipath_ports_extrabuf)
1931 		pd->port_pio_base = (dd->ipath_pbufsport + 1)
1932 			* (pd->port_port - 1);
1933 	else
1934 		pd->port_pio_base = dd->ipath_ports_extrabuf +
1935 			dd->ipath_pbufsport * (pd->port_port - 1);
1936 	pd->port_piobufs = dd->ipath_piobufbase +
1937 		pd->port_pio_base * dd->ipath_palign;
1938 	ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
1939 		" first pio %u\n", pd->port_port, pd->port_piobufs,
1940 		pd->port_piocnt, pd->port_pio_base);
1941 	ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
1942 
1943 	/*
1944 	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
1945 	 * array for time being.  If pd->port_port > chip-supported,
1946 	 * we need to do extra stuff here to handle by handling overflow
1947 	 * through port 0, someday
1948 	 */
1949 	ret = ipath_create_rcvhdrq(dd, pd);
1950 	if (!ret)
1951 		ret = ipath_create_user_egr(pd);
1952 	if (ret)
1953 		goto done;
1954 
1955 	/*
1956 	 * set the eager head register for this port to the current values
1957 	 * of the tail pointers, since we don't know if they were
1958 	 * updated on last use of the port.
1959 	 */
1960 	head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
1961 	ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
1962 	pd->port_lastrcvhdrqtail = -1;
1963 	ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
1964 		pd->port_port, head32);
1965 	pd->port_tidcursor = 0;	/* start at beginning after open */
1966 
1967 	/* initialize poll variables... */
1968 	pd->port_urgent = 0;
1969 	pd->port_urgent_poll = 0;
1970 	pd->port_hdrqfull_poll = pd->port_hdrqfull;
1971 
1972 	/*
1973 	 * Now enable the port for receive.
1974 	 * For chips that are set to DMA the tail register to memory
1975 	 * when they change (and when the update bit transitions from
1976 	 * 0 to 1.  So for those chips, we turn it off and then back on.
1977 	 * This will (very briefly) affect any other open ports, but the
1978 	 * duration is very short, and therefore isn't an issue.  We
1979 	 * explicitly set the in-memory tail copy to 0 beforehand, so we
1980 	 * don't have to wait to be sure the DMA update has happened
1981 	 * (chip resets head/tail to 0 on transition to enable).
1982 	 */
1983 	set_bit(dd->ipath_r_portenable_shift + pd->port_port,
1984 		&dd->ipath_rcvctrl);
1985 	if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
1986 		if (pd->port_rcvhdrtail_kvaddr)
1987 			ipath_clear_rcvhdrtail(pd);
1988 		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1989 			dd->ipath_rcvctrl &
1990 			~(1ULL << dd->ipath_r_tailupd_shift));
1991 	}
1992 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1993 			 dd->ipath_rcvctrl);
1994 	/* Notify any waiting slaves */
1995 	if (pd->port_subport_cnt) {
1996 		clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
1997 		wake_up(&pd->port_wait);
1998 	}
1999 done:
2000 	return ret;
2001 }
2002 
2003 /**
2004  * unlock_exptid - unlock any expected TID entries port still had in use
2005  * @pd: port
2006  *
2007  * We don't actually update the chip here, because we do a bulk update
2008  * below, using ipath_f_clear_tids.
2009  */
unlock_expected_tids(struct ipath_portdata * pd)2010 static void unlock_expected_tids(struct ipath_portdata *pd)
2011 {
2012 	struct ipath_devdata *dd = pd->port_dd;
2013 	int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt;
2014 	int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt;
2015 
2016 	ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
2017 		   pd->port_port);
2018 	for (i = port_tidbase; i < maxtid; i++) {
2019 		struct page *ps = dd->ipath_pageshadow[i];
2020 
2021 		if (!ps)
2022 			continue;
2023 
2024 		dd->ipath_pageshadow[i] = NULL;
2025 		pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
2026 			PAGE_SIZE, PCI_DMA_FROMDEVICE);
2027 		ipath_release_user_pages_on_close(&ps, 1);
2028 		cnt++;
2029 		ipath_stats.sps_pageunlocks++;
2030 	}
2031 	if (cnt)
2032 		ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n",
2033 			   pd->port_port, cnt);
2034 
2035 	if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks)
2036 		ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n",
2037 			   (unsigned long long) ipath_stats.sps_pagelocks,
2038 			   (unsigned long long)
2039 			   ipath_stats.sps_pageunlocks);
2040 }
2041 
ipath_close(struct inode * in,struct file * fp)2042 static int ipath_close(struct inode *in, struct file *fp)
2043 {
2044 	int ret = 0;
2045 	struct ipath_filedata *fd;
2046 	struct ipath_portdata *pd;
2047 	struct ipath_devdata *dd;
2048 	unsigned long flags;
2049 	unsigned port;
2050 	struct pid *pid;
2051 
2052 	ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
2053 		   (long)in->i_rdev, fp->private_data);
2054 
2055 	mutex_lock(&ipath_mutex);
2056 
2057 	fd = fp->private_data;
2058 	fp->private_data = NULL;
2059 	pd = fd->pd;
2060 	if (!pd) {
2061 		mutex_unlock(&ipath_mutex);
2062 		goto bail;
2063 	}
2064 
2065 	dd = pd->port_dd;
2066 
2067 	/* drain user sdma queue */
2068 	ipath_user_sdma_queue_drain(dd, fd->pq);
2069 	ipath_user_sdma_queue_destroy(fd->pq);
2070 
2071 	if (--pd->port_cnt) {
2072 		/*
2073 		 * XXX If the master closes the port before the slave(s),
2074 		 * revoke the mmap for the eager receive queue so
2075 		 * the slave(s) don't wait for receive data forever.
2076 		 */
2077 		pd->active_slaves &= ~(1 << fd->subport);
2078 		put_pid(pd->port_subpid[fd->subport]);
2079 		pd->port_subpid[fd->subport] = NULL;
2080 		mutex_unlock(&ipath_mutex);
2081 		goto bail;
2082 	}
2083 	/* early; no interrupt users after this */
2084 	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
2085 	port = pd->port_port;
2086 	dd->ipath_pd[port] = NULL;
2087 	pid = pd->port_pid;
2088 	pd->port_pid = NULL;
2089 	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2090 
2091 	if (pd->port_rcvwait_to || pd->port_piowait_to
2092 	    || pd->port_rcvnowait || pd->port_pionowait) {
2093 		ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; "
2094 			   "%u rcv %u, pio already\n",
2095 			   pd->port_port, pd->port_rcvwait_to,
2096 			   pd->port_piowait_to, pd->port_rcvnowait,
2097 			   pd->port_pionowait);
2098 		pd->port_rcvwait_to = pd->port_piowait_to =
2099 			pd->port_rcvnowait = pd->port_pionowait = 0;
2100 	}
2101 	if (pd->port_flag) {
2102 		ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n",
2103 			  pd->port_port, pd->port_flag);
2104 		pd->port_flag = 0;
2105 	}
2106 
2107 	if (dd->ipath_kregbase) {
2108 		/* atomically clear receive enable port and intr avail. */
2109 		clear_bit(dd->ipath_r_portenable_shift + port,
2110 			  &dd->ipath_rcvctrl);
2111 		clear_bit(pd->port_port + dd->ipath_r_intravail_shift,
2112 			  &dd->ipath_rcvctrl);
2113 		ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
2114 			dd->ipath_rcvctrl);
2115 		/* and read back from chip to be sure that nothing
2116 		 * else is in flight when we do the rest */
2117 		(void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
2118 
2119 		/* clean up the pkeys for this port user */
2120 		ipath_clean_part_key(pd, dd);
2121 		/*
2122 		 * be paranoid, and never write 0's to these, just use an
2123 		 * unused part of the port 0 tail page.  Of course,
2124 		 * rcvhdraddr points to a large chunk of memory, so this
2125 		 * could still trash things, but at least it won't trash
2126 		 * page 0, and by disabling the port, it should stop "soon",
2127 		 * even if a packet or two is in already in flight after we
2128 		 * disabled the port.
2129 		 */
2130 		ipath_write_kreg_port(dd,
2131 		        dd->ipath_kregs->kr_rcvhdrtailaddr, port,
2132 			dd->ipath_dummy_hdrq_phys);
2133 		ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
2134 			pd->port_port, dd->ipath_dummy_hdrq_phys);
2135 
2136 		ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
2137 		ipath_chg_pioavailkernel(dd, pd->port_pio_base,
2138 			pd->port_piocnt, 1);
2139 
2140 		dd->ipath_f_clear_tids(dd, pd->port_port);
2141 
2142 		if (dd->ipath_pageshadow)
2143 			unlock_expected_tids(pd);
2144 		ipath_stats.sps_ports--;
2145 		ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
2146 			   pd->port_comm, pid_nr(pid),
2147 			   dd->ipath_unit, port);
2148 	}
2149 
2150 	put_pid(pid);
2151 	mutex_unlock(&ipath_mutex);
2152 	ipath_free_pddata(dd, pd); /* after releasing the mutex */
2153 
2154 bail:
2155 	kfree(fd);
2156 	return ret;
2157 }
2158 
ipath_port_info(struct ipath_portdata * pd,u16 subport,struct ipath_port_info __user * uinfo)2159 static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
2160 			   struct ipath_port_info __user *uinfo)
2161 {
2162 	struct ipath_port_info info;
2163 	int nup;
2164 	int ret;
2165 	size_t sz;
2166 
2167 	(void) ipath_count_units(NULL, &nup, NULL);
2168 	info.num_active = nup;
2169 	info.unit = pd->port_dd->ipath_unit;
2170 	info.port = pd->port_port;
2171 	info.subport = subport;
2172 	/* Don't return new fields if old library opened the port. */
2173 	if (ipath_supports_subports(pd->userversion >> 16,
2174 				    pd->userversion & 0xffff)) {
2175 		/* Number of user ports available for this device. */
2176 		info.num_ports = pd->port_dd->ipath_cfgports - 1;
2177 		info.num_subports = pd->port_subport_cnt;
2178 		sz = sizeof(info);
2179 	} else
2180 		sz = sizeof(info) - 2 * sizeof(u16);
2181 
2182 	if (copy_to_user(uinfo, &info, sz)) {
2183 		ret = -EFAULT;
2184 		goto bail;
2185 	}
2186 	ret = 0;
2187 
2188 bail:
2189 	return ret;
2190 }
2191 
ipath_get_slave_info(struct ipath_portdata * pd,void __user * slave_mask_addr)2192 static int ipath_get_slave_info(struct ipath_portdata *pd,
2193 				void __user *slave_mask_addr)
2194 {
2195 	int ret = 0;
2196 
2197 	if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
2198 		ret = -EFAULT;
2199 	return ret;
2200 }
2201 
ipath_sdma_get_inflight(struct ipath_user_sdma_queue * pq,u32 __user * inflightp)2202 static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq,
2203 				   u32 __user *inflightp)
2204 {
2205 	const u32 val = ipath_user_sdma_inflight_counter(pq);
2206 
2207 	if (put_user(val, inflightp))
2208 		return -EFAULT;
2209 
2210 	return 0;
2211 }
2212 
ipath_sdma_get_complete(struct ipath_devdata * dd,struct ipath_user_sdma_queue * pq,u32 __user * completep)2213 static int ipath_sdma_get_complete(struct ipath_devdata *dd,
2214 				   struct ipath_user_sdma_queue *pq,
2215 				   u32 __user *completep)
2216 {
2217 	u32 val;
2218 	int err;
2219 
2220 	err = ipath_user_sdma_make_progress(dd, pq);
2221 	if (err < 0)
2222 		return err;
2223 
2224 	val = ipath_user_sdma_complete_counter(pq);
2225 	if (put_user(val, completep))
2226 		return -EFAULT;
2227 
2228 	return 0;
2229 }
2230 
ipath_write(struct file * fp,const char __user * data,size_t count,loff_t * off)2231 static ssize_t ipath_write(struct file *fp, const char __user *data,
2232 			   size_t count, loff_t *off)
2233 {
2234 	const struct ipath_cmd __user *ucmd;
2235 	struct ipath_portdata *pd;
2236 	const void __user *src;
2237 	size_t consumed, copy;
2238 	struct ipath_cmd cmd;
2239 	ssize_t ret = 0;
2240 	void *dest;
2241 
2242 	if (count < sizeof(cmd.type)) {
2243 		ret = -EINVAL;
2244 		goto bail;
2245 	}
2246 
2247 	ucmd = (const struct ipath_cmd __user *) data;
2248 
2249 	if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
2250 		ret = -EFAULT;
2251 		goto bail;
2252 	}
2253 
2254 	consumed = sizeof(cmd.type);
2255 
2256 	switch (cmd.type) {
2257 	case IPATH_CMD_ASSIGN_PORT:
2258 	case __IPATH_CMD_USER_INIT:
2259 	case IPATH_CMD_USER_INIT:
2260 		copy = sizeof(cmd.cmd.user_info);
2261 		dest = &cmd.cmd.user_info;
2262 		src = &ucmd->cmd.user_info;
2263 		break;
2264 	case IPATH_CMD_RECV_CTRL:
2265 		copy = sizeof(cmd.cmd.recv_ctrl);
2266 		dest = &cmd.cmd.recv_ctrl;
2267 		src = &ucmd->cmd.recv_ctrl;
2268 		break;
2269 	case IPATH_CMD_PORT_INFO:
2270 		copy = sizeof(cmd.cmd.port_info);
2271 		dest = &cmd.cmd.port_info;
2272 		src = &ucmd->cmd.port_info;
2273 		break;
2274 	case IPATH_CMD_TID_UPDATE:
2275 	case IPATH_CMD_TID_FREE:
2276 		copy = sizeof(cmd.cmd.tid_info);
2277 		dest = &cmd.cmd.tid_info;
2278 		src = &ucmd->cmd.tid_info;
2279 		break;
2280 	case IPATH_CMD_SET_PART_KEY:
2281 		copy = sizeof(cmd.cmd.part_key);
2282 		dest = &cmd.cmd.part_key;
2283 		src = &ucmd->cmd.part_key;
2284 		break;
2285 	case __IPATH_CMD_SLAVE_INFO:
2286 		copy = sizeof(cmd.cmd.slave_mask_addr);
2287 		dest = &cmd.cmd.slave_mask_addr;
2288 		src = &ucmd->cmd.slave_mask_addr;
2289 		break;
2290 	case IPATH_CMD_PIOAVAILUPD:	// force an update of PIOAvail reg
2291 		copy = 0;
2292 		src = NULL;
2293 		dest = NULL;
2294 		break;
2295 	case IPATH_CMD_POLL_TYPE:
2296 		copy = sizeof(cmd.cmd.poll_type);
2297 		dest = &cmd.cmd.poll_type;
2298 		src = &ucmd->cmd.poll_type;
2299 		break;
2300 	case IPATH_CMD_ARMLAUNCH_CTRL:
2301 		copy = sizeof(cmd.cmd.armlaunch_ctrl);
2302 		dest = &cmd.cmd.armlaunch_ctrl;
2303 		src = &ucmd->cmd.armlaunch_ctrl;
2304 		break;
2305 	case IPATH_CMD_SDMA_INFLIGHT:
2306 		copy = sizeof(cmd.cmd.sdma_inflight);
2307 		dest = &cmd.cmd.sdma_inflight;
2308 		src = &ucmd->cmd.sdma_inflight;
2309 		break;
2310 	case IPATH_CMD_SDMA_COMPLETE:
2311 		copy = sizeof(cmd.cmd.sdma_complete);
2312 		dest = &cmd.cmd.sdma_complete;
2313 		src = &ucmd->cmd.sdma_complete;
2314 		break;
2315 	default:
2316 		ret = -EINVAL;
2317 		goto bail;
2318 	}
2319 
2320 	if (copy) {
2321 		if ((count - consumed) < copy) {
2322 			ret = -EINVAL;
2323 			goto bail;
2324 		}
2325 
2326 		if (copy_from_user(dest, src, copy)) {
2327 			ret = -EFAULT;
2328 			goto bail;
2329 		}
2330 
2331 		consumed += copy;
2332 	}
2333 
2334 	pd = port_fp(fp);
2335 	if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
2336 		cmd.type != IPATH_CMD_ASSIGN_PORT) {
2337 		ret = -EINVAL;
2338 		goto bail;
2339 	}
2340 
2341 	switch (cmd.type) {
2342 	case IPATH_CMD_ASSIGN_PORT:
2343 		ret = ipath_assign_port(fp, &cmd.cmd.user_info);
2344 		if (ret)
2345 			goto bail;
2346 		break;
2347 	case __IPATH_CMD_USER_INIT:
2348 		/* backwards compatibility, get port first */
2349 		ret = ipath_assign_port(fp, &cmd.cmd.user_info);
2350 		if (ret)
2351 			goto bail;
2352 		/* and fall through to current version. */
2353 	case IPATH_CMD_USER_INIT:
2354 		ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
2355 		if (ret)
2356 			goto bail;
2357 		ret = ipath_get_base_info(
2358 			fp, (void __user *) (unsigned long)
2359 			cmd.cmd.user_info.spu_base_info,
2360 			cmd.cmd.user_info.spu_base_info_size);
2361 		break;
2362 	case IPATH_CMD_RECV_CTRL:
2363 		ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
2364 		break;
2365 	case IPATH_CMD_PORT_INFO:
2366 		ret = ipath_port_info(pd, subport_fp(fp),
2367 				      (struct ipath_port_info __user *)
2368 				      (unsigned long) cmd.cmd.port_info);
2369 		break;
2370 	case IPATH_CMD_TID_UPDATE:
2371 		ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
2372 		break;
2373 	case IPATH_CMD_TID_FREE:
2374 		ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
2375 		break;
2376 	case IPATH_CMD_SET_PART_KEY:
2377 		ret = ipath_set_part_key(pd, cmd.cmd.part_key);
2378 		break;
2379 	case __IPATH_CMD_SLAVE_INFO:
2380 		ret = ipath_get_slave_info(pd,
2381 					   (void __user *) (unsigned long)
2382 					   cmd.cmd.slave_mask_addr);
2383 		break;
2384 	case IPATH_CMD_PIOAVAILUPD:
2385 		ipath_force_pio_avail_update(pd->port_dd);
2386 		break;
2387 	case IPATH_CMD_POLL_TYPE:
2388 		pd->poll_type = cmd.cmd.poll_type;
2389 		break;
2390 	case IPATH_CMD_ARMLAUNCH_CTRL:
2391 		if (cmd.cmd.armlaunch_ctrl)
2392 			ipath_enable_armlaunch(pd->port_dd);
2393 		else
2394 			ipath_disable_armlaunch(pd->port_dd);
2395 		break;
2396 	case IPATH_CMD_SDMA_INFLIGHT:
2397 		ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp),
2398 					      (u32 __user *) (unsigned long)
2399 					      cmd.cmd.sdma_inflight);
2400 		break;
2401 	case IPATH_CMD_SDMA_COMPLETE:
2402 		ret = ipath_sdma_get_complete(pd->port_dd,
2403 					      user_sdma_queue_fp(fp),
2404 					      (u32 __user *) (unsigned long)
2405 					      cmd.cmd.sdma_complete);
2406 		break;
2407 	}
2408 
2409 	if (ret >= 0)
2410 		ret = consumed;
2411 
2412 bail:
2413 	return ret;
2414 }
2415 
ipath_writev(struct kiocb * iocb,const struct iovec * iov,unsigned long dim,loff_t off)2416 static ssize_t ipath_writev(struct kiocb *iocb, const struct iovec *iov,
2417 			    unsigned long dim, loff_t off)
2418 {
2419 	struct file *filp = iocb->ki_filp;
2420 	struct ipath_filedata *fp = filp->private_data;
2421 	struct ipath_portdata *pd = port_fp(filp);
2422 	struct ipath_user_sdma_queue *pq = fp->pq;
2423 
2424 	if (!dim)
2425 		return -EINVAL;
2426 
2427 	return ipath_user_sdma_writev(pd->port_dd, pq, iov, dim);
2428 }
2429 
2430 static struct class *ipath_class;
2431 
init_cdev(int minor,char * name,const struct file_operations * fops,struct cdev ** cdevp,struct device ** devp)2432 static int init_cdev(int minor, char *name, const struct file_operations *fops,
2433 		     struct cdev **cdevp, struct device **devp)
2434 {
2435 	const dev_t dev = MKDEV(IPATH_MAJOR, minor);
2436 	struct cdev *cdev = NULL;
2437 	struct device *device = NULL;
2438 	int ret;
2439 
2440 	cdev = cdev_alloc();
2441 	if (!cdev) {
2442 		printk(KERN_ERR IPATH_DRV_NAME
2443 		       ": Could not allocate cdev for minor %d, %s\n",
2444 		       minor, name);
2445 		ret = -ENOMEM;
2446 		goto done;
2447 	}
2448 
2449 	cdev->owner = THIS_MODULE;
2450 	cdev->ops = fops;
2451 	kobject_set_name(&cdev->kobj, name);
2452 
2453 	ret = cdev_add(cdev, dev, 1);
2454 	if (ret < 0) {
2455 		printk(KERN_ERR IPATH_DRV_NAME
2456 		       ": Could not add cdev for minor %d, %s (err %d)\n",
2457 		       minor, name, -ret);
2458 		goto err_cdev;
2459 	}
2460 
2461 	device = device_create(ipath_class, NULL, dev, NULL, name);
2462 
2463 	if (IS_ERR(device)) {
2464 		ret = PTR_ERR(device);
2465 		printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
2466 		       "device for minor %d, %s (err %d)\n",
2467 		       minor, name, -ret);
2468 		goto err_cdev;
2469 	}
2470 
2471 	goto done;
2472 
2473 err_cdev:
2474 	cdev_del(cdev);
2475 	cdev = NULL;
2476 
2477 done:
2478 	if (ret >= 0) {
2479 		*cdevp = cdev;
2480 		*devp = device;
2481 	} else {
2482 		*cdevp = NULL;
2483 		*devp = NULL;
2484 	}
2485 
2486 	return ret;
2487 }
2488 
ipath_cdev_init(int minor,char * name,const struct file_operations * fops,struct cdev ** cdevp,struct device ** devp)2489 int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
2490 		    struct cdev **cdevp, struct device **devp)
2491 {
2492 	return init_cdev(minor, name, fops, cdevp, devp);
2493 }
2494 
cleanup_cdev(struct cdev ** cdevp,struct device ** devp)2495 static void cleanup_cdev(struct cdev **cdevp,
2496 			 struct device **devp)
2497 {
2498 	struct device *dev = *devp;
2499 
2500 	if (dev) {
2501 		device_unregister(dev);
2502 		*devp = NULL;
2503 	}
2504 
2505 	if (*cdevp) {
2506 		cdev_del(*cdevp);
2507 		*cdevp = NULL;
2508 	}
2509 }
2510 
ipath_cdev_cleanup(struct cdev ** cdevp,struct device ** devp)2511 void ipath_cdev_cleanup(struct cdev **cdevp,
2512 			struct device **devp)
2513 {
2514 	cleanup_cdev(cdevp, devp);
2515 }
2516 
2517 static struct cdev *wildcard_cdev;
2518 static struct device *wildcard_dev;
2519 
2520 static const dev_t dev = MKDEV(IPATH_MAJOR, 0);
2521 
user_init(void)2522 static int user_init(void)
2523 {
2524 	int ret;
2525 
2526 	ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME);
2527 	if (ret < 0) {
2528 		printk(KERN_ERR IPATH_DRV_NAME ": Could not register "
2529 		       "chrdev region (err %d)\n", -ret);
2530 		goto done;
2531 	}
2532 
2533 	ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME);
2534 
2535 	if (IS_ERR(ipath_class)) {
2536 		ret = PTR_ERR(ipath_class);
2537 		printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
2538 		       "device class (err %d)\n", -ret);
2539 		goto bail;
2540 	}
2541 
2542 	goto done;
2543 bail:
2544 	unregister_chrdev_region(dev, IPATH_NMINORS);
2545 done:
2546 	return ret;
2547 }
2548 
user_cleanup(void)2549 static void user_cleanup(void)
2550 {
2551 	if (ipath_class) {
2552 		class_destroy(ipath_class);
2553 		ipath_class = NULL;
2554 	}
2555 
2556 	unregister_chrdev_region(dev, IPATH_NMINORS);
2557 }
2558 
2559 static atomic_t user_count = ATOMIC_INIT(0);
2560 static atomic_t user_setup = ATOMIC_INIT(0);
2561 
ipath_user_add(struct ipath_devdata * dd)2562 int ipath_user_add(struct ipath_devdata *dd)
2563 {
2564 	char name[10];
2565 	int ret;
2566 
2567 	if (atomic_inc_return(&user_count) == 1) {
2568 		ret = user_init();
2569 		if (ret < 0) {
2570 			ipath_dev_err(dd, "Unable to set up user support: "
2571 				      "error %d\n", -ret);
2572 			goto bail;
2573 		}
2574 		ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
2575 				&wildcard_dev);
2576 		if (ret < 0) {
2577 			ipath_dev_err(dd, "Could not create wildcard "
2578 				      "minor: error %d\n", -ret);
2579 			goto bail_user;
2580 		}
2581 
2582 		atomic_set(&user_setup, 1);
2583 	}
2584 
2585 	snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
2586 
2587 	ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
2588 			&dd->user_cdev, &dd->user_dev);
2589 	if (ret < 0)
2590 		ipath_dev_err(dd, "Could not create user minor %d, %s\n",
2591 			      dd->ipath_unit + 1, name);
2592 
2593 	goto bail;
2594 
2595 bail_user:
2596 	user_cleanup();
2597 bail:
2598 	return ret;
2599 }
2600 
ipath_user_remove(struct ipath_devdata * dd)2601 void ipath_user_remove(struct ipath_devdata *dd)
2602 {
2603 	cleanup_cdev(&dd->user_cdev, &dd->user_dev);
2604 
2605 	if (atomic_dec_return(&user_count) == 0) {
2606 		if (atomic_read(&user_setup) == 0)
2607 			goto bail;
2608 
2609 		cleanup_cdev(&wildcard_cdev, &wildcard_dev);
2610 		user_cleanup();
2611 
2612 		atomic_set(&user_setup, 0);
2613 	}
2614 bail:
2615 	return;
2616 }
2617