1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <crypto/hash.h>
3 #include <linux/export.h>
4 #include <linux/bvec.h>
5 #include <linux/fault-inject-usercopy.h>
6 #include <linux/uio.h>
7 #include <linux/pagemap.h>
8 #include <linux/highmem.h>
9 #include <linux/slab.h>
10 #include <linux/vmalloc.h>
11 #include <linux/splice.h>
12 #include <linux/compat.h>
13 #include <net/checksum.h>
14 #include <linux/scatterlist.h>
15 #include <linux/instrumented.h>
16
17 #define PIPE_PARANOIA /* for now */
18
19 /* covers iovec and kvec alike */
20 #define iterate_iovec(i, n, base, len, off, __p, STEP) { \
21 size_t off = 0; \
22 size_t skip = i->iov_offset; \
23 do { \
24 len = min(n, __p->iov_len - skip); \
25 if (likely(len)) { \
26 base = __p->iov_base + skip; \
27 len -= (STEP); \
28 off += len; \
29 skip += len; \
30 n -= len; \
31 if (skip < __p->iov_len) \
32 break; \
33 } \
34 __p++; \
35 skip = 0; \
36 } while (n); \
37 i->iov_offset = skip; \
38 n = off; \
39 }
40
41 #define iterate_bvec(i, n, base, len, off, p, STEP) { \
42 size_t off = 0; \
43 unsigned skip = i->iov_offset; \
44 while (n) { \
45 unsigned offset = p->bv_offset + skip; \
46 unsigned left; \
47 void *kaddr = kmap_local_page(p->bv_page + \
48 offset / PAGE_SIZE); \
49 base = kaddr + offset % PAGE_SIZE; \
50 len = min(min(n, (size_t)(p->bv_len - skip)), \
51 (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \
52 left = (STEP); \
53 kunmap_local(kaddr); \
54 len -= left; \
55 off += len; \
56 skip += len; \
57 if (skip == p->bv_len) { \
58 skip = 0; \
59 p++; \
60 } \
61 n -= len; \
62 if (left) \
63 break; \
64 } \
65 i->iov_offset = skip; \
66 n = off; \
67 }
68
69 #define iterate_xarray(i, n, base, len, __off, STEP) { \
70 __label__ __out; \
71 size_t __off = 0; \
72 struct folio *folio; \
73 loff_t start = i->xarray_start + i->iov_offset; \
74 pgoff_t index = start / PAGE_SIZE; \
75 XA_STATE(xas, i->xarray, index); \
76 \
77 len = PAGE_SIZE - offset_in_page(start); \
78 rcu_read_lock(); \
79 xas_for_each(&xas, folio, ULONG_MAX) { \
80 unsigned left; \
81 size_t offset; \
82 if (xas_retry(&xas, folio)) \
83 continue; \
84 if (WARN_ON(xa_is_value(folio))) \
85 break; \
86 if (WARN_ON(folio_test_hugetlb(folio))) \
87 break; \
88 offset = offset_in_folio(folio, start + __off); \
89 while (offset < folio_size(folio)) { \
90 base = kmap_local_folio(folio, offset); \
91 len = min(n, len); \
92 left = (STEP); \
93 kunmap_local(base); \
94 len -= left; \
95 __off += len; \
96 n -= len; \
97 if (left || n == 0) \
98 goto __out; \
99 offset += len; \
100 len = PAGE_SIZE; \
101 } \
102 } \
103 __out: \
104 rcu_read_unlock(); \
105 i->iov_offset += __off; \
106 n = __off; \
107 }
108
109 #define __iterate_and_advance(i, n, base, len, off, I, K) { \
110 if (unlikely(i->count < n)) \
111 n = i->count; \
112 if (likely(n)) { \
113 if (likely(iter_is_iovec(i))) { \
114 const struct iovec *iov = i->iov; \
115 void __user *base; \
116 size_t len; \
117 iterate_iovec(i, n, base, len, off, \
118 iov, (I)) \
119 i->nr_segs -= iov - i->iov; \
120 i->iov = iov; \
121 } else if (iov_iter_is_bvec(i)) { \
122 const struct bio_vec *bvec = i->bvec; \
123 void *base; \
124 size_t len; \
125 iterate_bvec(i, n, base, len, off, \
126 bvec, (K)) \
127 i->nr_segs -= bvec - i->bvec; \
128 i->bvec = bvec; \
129 } else if (iov_iter_is_kvec(i)) { \
130 const struct kvec *kvec = i->kvec; \
131 void *base; \
132 size_t len; \
133 iterate_iovec(i, n, base, len, off, \
134 kvec, (K)) \
135 i->nr_segs -= kvec - i->kvec; \
136 i->kvec = kvec; \
137 } else if (iov_iter_is_xarray(i)) { \
138 void *base; \
139 size_t len; \
140 iterate_xarray(i, n, base, len, off, \
141 (K)) \
142 } \
143 i->count -= n; \
144 } \
145 }
146 #define iterate_and_advance(i, n, base, len, off, I, K) \
147 __iterate_and_advance(i, n, base, len, off, I, ((void)(K),0))
148
copyout(void __user * to,const void * from,size_t n)149 static int copyout(void __user *to, const void *from, size_t n)
150 {
151 if (should_fail_usercopy())
152 return n;
153 if (access_ok(to, n)) {
154 instrument_copy_to_user(to, from, n);
155 n = raw_copy_to_user(to, from, n);
156 }
157 return n;
158 }
159
copyin(void * to,const void __user * from,size_t n)160 static int copyin(void *to, const void __user *from, size_t n)
161 {
162 if (should_fail_usercopy())
163 return n;
164 if (access_ok(from, n)) {
165 instrument_copy_from_user(to, from, n);
166 n = raw_copy_from_user(to, from, n);
167 }
168 return n;
169 }
170
copy_page_to_iter_iovec(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)171 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
172 struct iov_iter *i)
173 {
174 size_t skip, copy, left, wanted;
175 const struct iovec *iov;
176 char __user *buf;
177 void *kaddr, *from;
178
179 if (unlikely(bytes > i->count))
180 bytes = i->count;
181
182 if (unlikely(!bytes))
183 return 0;
184
185 might_fault();
186 wanted = bytes;
187 iov = i->iov;
188 skip = i->iov_offset;
189 buf = iov->iov_base + skip;
190 copy = min(bytes, iov->iov_len - skip);
191
192 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) {
193 kaddr = kmap_atomic(page);
194 from = kaddr + offset;
195
196 /* first chunk, usually the only one */
197 left = copyout(buf, from, copy);
198 copy -= left;
199 skip += copy;
200 from += copy;
201 bytes -= copy;
202
203 while (unlikely(!left && bytes)) {
204 iov++;
205 buf = iov->iov_base;
206 copy = min(bytes, iov->iov_len);
207 left = copyout(buf, from, copy);
208 copy -= left;
209 skip = copy;
210 from += copy;
211 bytes -= copy;
212 }
213 if (likely(!bytes)) {
214 kunmap_atomic(kaddr);
215 goto done;
216 }
217 offset = from - kaddr;
218 buf += copy;
219 kunmap_atomic(kaddr);
220 copy = min(bytes, iov->iov_len - skip);
221 }
222 /* Too bad - revert to non-atomic kmap */
223
224 kaddr = kmap(page);
225 from = kaddr + offset;
226 left = copyout(buf, from, copy);
227 copy -= left;
228 skip += copy;
229 from += copy;
230 bytes -= copy;
231 while (unlikely(!left && bytes)) {
232 iov++;
233 buf = iov->iov_base;
234 copy = min(bytes, iov->iov_len);
235 left = copyout(buf, from, copy);
236 copy -= left;
237 skip = copy;
238 from += copy;
239 bytes -= copy;
240 }
241 kunmap(page);
242
243 done:
244 if (skip == iov->iov_len) {
245 iov++;
246 skip = 0;
247 }
248 i->count -= wanted - bytes;
249 i->nr_segs -= iov - i->iov;
250 i->iov = iov;
251 i->iov_offset = skip;
252 return wanted - bytes;
253 }
254
copy_page_from_iter_iovec(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)255 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
256 struct iov_iter *i)
257 {
258 size_t skip, copy, left, wanted;
259 const struct iovec *iov;
260 char __user *buf;
261 void *kaddr, *to;
262
263 if (unlikely(bytes > i->count))
264 bytes = i->count;
265
266 if (unlikely(!bytes))
267 return 0;
268
269 might_fault();
270 wanted = bytes;
271 iov = i->iov;
272 skip = i->iov_offset;
273 buf = iov->iov_base + skip;
274 copy = min(bytes, iov->iov_len - skip);
275
276 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) {
277 kaddr = kmap_atomic(page);
278 to = kaddr + offset;
279
280 /* first chunk, usually the only one */
281 left = copyin(to, buf, copy);
282 copy -= left;
283 skip += copy;
284 to += copy;
285 bytes -= copy;
286
287 while (unlikely(!left && bytes)) {
288 iov++;
289 buf = iov->iov_base;
290 copy = min(bytes, iov->iov_len);
291 left = copyin(to, buf, copy);
292 copy -= left;
293 skip = copy;
294 to += copy;
295 bytes -= copy;
296 }
297 if (likely(!bytes)) {
298 kunmap_atomic(kaddr);
299 goto done;
300 }
301 offset = to - kaddr;
302 buf += copy;
303 kunmap_atomic(kaddr);
304 copy = min(bytes, iov->iov_len - skip);
305 }
306 /* Too bad - revert to non-atomic kmap */
307
308 kaddr = kmap(page);
309 to = kaddr + offset;
310 left = copyin(to, buf, copy);
311 copy -= left;
312 skip += copy;
313 to += copy;
314 bytes -= copy;
315 while (unlikely(!left && bytes)) {
316 iov++;
317 buf = iov->iov_base;
318 copy = min(bytes, iov->iov_len);
319 left = copyin(to, buf, copy);
320 copy -= left;
321 skip = copy;
322 to += copy;
323 bytes -= copy;
324 }
325 kunmap(page);
326
327 done:
328 if (skip == iov->iov_len) {
329 iov++;
330 skip = 0;
331 }
332 i->count -= wanted - bytes;
333 i->nr_segs -= iov - i->iov;
334 i->iov = iov;
335 i->iov_offset = skip;
336 return wanted - bytes;
337 }
338
339 #ifdef PIPE_PARANOIA
sanity(const struct iov_iter * i)340 static bool sanity(const struct iov_iter *i)
341 {
342 struct pipe_inode_info *pipe = i->pipe;
343 unsigned int p_head = pipe->head;
344 unsigned int p_tail = pipe->tail;
345 unsigned int p_mask = pipe->ring_size - 1;
346 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
347 unsigned int i_head = i->head;
348 unsigned int idx;
349
350 if (i->iov_offset) {
351 struct pipe_buffer *p;
352 if (unlikely(p_occupancy == 0))
353 goto Bad; // pipe must be non-empty
354 if (unlikely(i_head != p_head - 1))
355 goto Bad; // must be at the last buffer...
356
357 p = &pipe->bufs[i_head & p_mask];
358 if (unlikely(p->offset + p->len != i->iov_offset))
359 goto Bad; // ... at the end of segment
360 } else {
361 if (i_head != p_head)
362 goto Bad; // must be right after the last buffer
363 }
364 return true;
365 Bad:
366 printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
367 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
368 p_head, p_tail, pipe->ring_size);
369 for (idx = 0; idx < pipe->ring_size; idx++)
370 printk(KERN_ERR "[%p %p %d %d]\n",
371 pipe->bufs[idx].ops,
372 pipe->bufs[idx].page,
373 pipe->bufs[idx].offset,
374 pipe->bufs[idx].len);
375 WARN_ON(1);
376 return false;
377 }
378 #else
379 #define sanity(i) true
380 #endif
381
copy_page_to_iter_pipe(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)382 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
383 struct iov_iter *i)
384 {
385 struct pipe_inode_info *pipe = i->pipe;
386 struct pipe_buffer *buf;
387 unsigned int p_tail = pipe->tail;
388 unsigned int p_mask = pipe->ring_size - 1;
389 unsigned int i_head = i->head;
390 size_t off;
391
392 if (unlikely(bytes > i->count))
393 bytes = i->count;
394
395 if (unlikely(!bytes))
396 return 0;
397
398 if (!sanity(i))
399 return 0;
400
401 off = i->iov_offset;
402 buf = &pipe->bufs[i_head & p_mask];
403 if (off) {
404 if (offset == off && buf->page == page) {
405 /* merge with the last one */
406 buf->len += bytes;
407 i->iov_offset += bytes;
408 goto out;
409 }
410 i_head++;
411 buf = &pipe->bufs[i_head & p_mask];
412 }
413 if (pipe_full(i_head, p_tail, pipe->max_usage))
414 return 0;
415
416 buf->ops = &page_cache_pipe_buf_ops;
417 buf->flags = 0;
418 get_page(page);
419 buf->page = page;
420 buf->offset = offset;
421 buf->len = bytes;
422
423 pipe->head = i_head + 1;
424 i->iov_offset = offset + bytes;
425 i->head = i_head;
426 out:
427 i->count -= bytes;
428 return bytes;
429 }
430
431 /*
432 * fault_in_iov_iter_readable - fault in iov iterator for reading
433 * @i: iterator
434 * @size: maximum length
435 *
436 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
437 * @size. For each iovec, fault in each page that constitutes the iovec.
438 *
439 * Returns the number of bytes not faulted in (like copy_to_user() and
440 * copy_from_user()).
441 *
442 * Always returns 0 for non-userspace iterators.
443 */
fault_in_iov_iter_readable(const struct iov_iter * i,size_t size)444 size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
445 {
446 if (iter_is_iovec(i)) {
447 size_t count = min(size, iov_iter_count(i));
448 const struct iovec *p;
449 size_t skip;
450
451 size -= count;
452 for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
453 size_t len = min(count, p->iov_len - skip);
454 size_t ret;
455
456 if (unlikely(!len))
457 continue;
458 ret = fault_in_readable(p->iov_base + skip, len);
459 count -= len - ret;
460 if (ret)
461 break;
462 }
463 return count + size;
464 }
465 return 0;
466 }
467 EXPORT_SYMBOL(fault_in_iov_iter_readable);
468
469 /*
470 * fault_in_iov_iter_writeable - fault in iov iterator for writing
471 * @i: iterator
472 * @size: maximum length
473 *
474 * Faults in the iterator using get_user_pages(), i.e., without triggering
475 * hardware page faults. This is primarily useful when we already know that
476 * some or all of the pages in @i aren't in memory.
477 *
478 * Returns the number of bytes not faulted in, like copy_to_user() and
479 * copy_from_user().
480 *
481 * Always returns 0 for non-user-space iterators.
482 */
fault_in_iov_iter_writeable(const struct iov_iter * i,size_t size)483 size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
484 {
485 if (iter_is_iovec(i)) {
486 size_t count = min(size, iov_iter_count(i));
487 const struct iovec *p;
488 size_t skip;
489
490 size -= count;
491 for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
492 size_t len = min(count, p->iov_len - skip);
493 size_t ret;
494
495 if (unlikely(!len))
496 continue;
497 ret = fault_in_safe_writeable(p->iov_base + skip, len);
498 count -= len - ret;
499 if (ret)
500 break;
501 }
502 return count + size;
503 }
504 return 0;
505 }
506 EXPORT_SYMBOL(fault_in_iov_iter_writeable);
507
iov_iter_init(struct iov_iter * i,unsigned int direction,const struct iovec * iov,unsigned long nr_segs,size_t count)508 void iov_iter_init(struct iov_iter *i, unsigned int direction,
509 const struct iovec *iov, unsigned long nr_segs,
510 size_t count)
511 {
512 WARN_ON(direction & ~(READ | WRITE));
513 *i = (struct iov_iter) {
514 .iter_type = ITER_IOVEC,
515 .nofault = false,
516 .data_source = direction,
517 .iov = iov,
518 .nr_segs = nr_segs,
519 .iov_offset = 0,
520 .count = count
521 };
522 }
523 EXPORT_SYMBOL(iov_iter_init);
524
allocated(struct pipe_buffer * buf)525 static inline bool allocated(struct pipe_buffer *buf)
526 {
527 return buf->ops == &default_pipe_buf_ops;
528 }
529
data_start(const struct iov_iter * i,unsigned int * iter_headp,size_t * offp)530 static inline void data_start(const struct iov_iter *i,
531 unsigned int *iter_headp, size_t *offp)
532 {
533 unsigned int p_mask = i->pipe->ring_size - 1;
534 unsigned int iter_head = i->head;
535 size_t off = i->iov_offset;
536
537 if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
538 off == PAGE_SIZE)) {
539 iter_head++;
540 off = 0;
541 }
542 *iter_headp = iter_head;
543 *offp = off;
544 }
545
push_pipe(struct iov_iter * i,size_t size,int * iter_headp,size_t * offp)546 static size_t push_pipe(struct iov_iter *i, size_t size,
547 int *iter_headp, size_t *offp)
548 {
549 struct pipe_inode_info *pipe = i->pipe;
550 unsigned int p_tail = pipe->tail;
551 unsigned int p_mask = pipe->ring_size - 1;
552 unsigned int iter_head;
553 size_t off;
554 ssize_t left;
555
556 if (unlikely(size > i->count))
557 size = i->count;
558 if (unlikely(!size))
559 return 0;
560
561 left = size;
562 data_start(i, &iter_head, &off);
563 *iter_headp = iter_head;
564 *offp = off;
565 if (off) {
566 left -= PAGE_SIZE - off;
567 if (left <= 0) {
568 pipe->bufs[iter_head & p_mask].len += size;
569 return size;
570 }
571 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
572 iter_head++;
573 }
574 while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
575 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
576 struct page *page = alloc_page(GFP_USER);
577 if (!page)
578 break;
579
580 buf->ops = &default_pipe_buf_ops;
581 buf->flags = 0;
582 buf->page = page;
583 buf->offset = 0;
584 buf->len = min_t(ssize_t, left, PAGE_SIZE);
585 left -= buf->len;
586 iter_head++;
587 pipe->head = iter_head;
588
589 if (left == 0)
590 return size;
591 }
592 return size - left;
593 }
594
copy_pipe_to_iter(const void * addr,size_t bytes,struct iov_iter * i)595 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
596 struct iov_iter *i)
597 {
598 struct pipe_inode_info *pipe = i->pipe;
599 unsigned int p_mask = pipe->ring_size - 1;
600 unsigned int i_head;
601 size_t n, off;
602
603 if (!sanity(i))
604 return 0;
605
606 bytes = n = push_pipe(i, bytes, &i_head, &off);
607 if (unlikely(!n))
608 return 0;
609 do {
610 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
611 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
612 i->head = i_head;
613 i->iov_offset = off + chunk;
614 n -= chunk;
615 addr += chunk;
616 off = 0;
617 i_head++;
618 } while (n);
619 i->count -= bytes;
620 return bytes;
621 }
622
csum_and_memcpy(void * to,const void * from,size_t len,__wsum sum,size_t off)623 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
624 __wsum sum, size_t off)
625 {
626 __wsum next = csum_partial_copy_nocheck(from, to, len);
627 return csum_block_add(sum, next, off);
628 }
629
csum_and_copy_to_pipe_iter(const void * addr,size_t bytes,struct iov_iter * i,__wsum * sump)630 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
631 struct iov_iter *i, __wsum *sump)
632 {
633 struct pipe_inode_info *pipe = i->pipe;
634 unsigned int p_mask = pipe->ring_size - 1;
635 __wsum sum = *sump;
636 size_t off = 0;
637 unsigned int i_head;
638 size_t r;
639
640 if (!sanity(i))
641 return 0;
642
643 bytes = push_pipe(i, bytes, &i_head, &r);
644 while (bytes) {
645 size_t chunk = min_t(size_t, bytes, PAGE_SIZE - r);
646 char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
647 sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off);
648 kunmap_local(p);
649 i->head = i_head;
650 i->iov_offset = r + chunk;
651 bytes -= chunk;
652 off += chunk;
653 r = 0;
654 i_head++;
655 }
656 *sump = sum;
657 i->count -= off;
658 return off;
659 }
660
_copy_to_iter(const void * addr,size_t bytes,struct iov_iter * i)661 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
662 {
663 if (unlikely(iov_iter_is_pipe(i)))
664 return copy_pipe_to_iter(addr, bytes, i);
665 if (iter_is_iovec(i))
666 might_fault();
667 iterate_and_advance(i, bytes, base, len, off,
668 copyout(base, addr + off, len),
669 memcpy(base, addr + off, len)
670 )
671
672 return bytes;
673 }
674 EXPORT_SYMBOL(_copy_to_iter);
675
676 #ifdef CONFIG_ARCH_HAS_COPY_MC
copyout_mc(void __user * to,const void * from,size_t n)677 static int copyout_mc(void __user *to, const void *from, size_t n)
678 {
679 if (access_ok(to, n)) {
680 instrument_copy_to_user(to, from, n);
681 n = copy_mc_to_user((__force void *) to, from, n);
682 }
683 return n;
684 }
685
copy_mc_pipe_to_iter(const void * addr,size_t bytes,struct iov_iter * i)686 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
687 struct iov_iter *i)
688 {
689 struct pipe_inode_info *pipe = i->pipe;
690 unsigned int p_mask = pipe->ring_size - 1;
691 unsigned int i_head;
692 unsigned int valid = pipe->head;
693 size_t n, off, xfer = 0;
694
695 if (!sanity(i))
696 return 0;
697
698 n = push_pipe(i, bytes, &i_head, &off);
699 while (n) {
700 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
701 char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
702 unsigned long rem;
703 rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);
704 chunk -= rem;
705 kunmap_local(p);
706 if (chunk) {
707 i->head = i_head;
708 i->iov_offset = off + chunk;
709 xfer += chunk;
710 valid = i_head + 1;
711 }
712 if (rem) {
713 pipe->bufs[i_head & p_mask].len -= rem;
714 pipe_discard_from(pipe, valid);
715 break;
716 }
717 n -= chunk;
718 off = 0;
719 i_head++;
720 }
721 i->count -= xfer;
722 return xfer;
723 }
724
725 /**
726 * _copy_mc_to_iter - copy to iter with source memory error exception handling
727 * @addr: source kernel address
728 * @bytes: total transfer length
729 * @i: destination iterator
730 *
731 * The pmem driver deploys this for the dax operation
732 * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
733 * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
734 * successfully copied.
735 *
736 * The main differences between this and typical _copy_to_iter().
737 *
738 * * Typical tail/residue handling after a fault retries the copy
739 * byte-by-byte until the fault happens again. Re-triggering machine
740 * checks is potentially fatal so the implementation uses source
741 * alignment and poison alignment assumptions to avoid re-triggering
742 * hardware exceptions.
743 *
744 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
745 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return
746 * a short copy.
747 *
748 * Return: number of bytes copied (may be %0)
749 */
_copy_mc_to_iter(const void * addr,size_t bytes,struct iov_iter * i)750 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
751 {
752 if (unlikely(iov_iter_is_pipe(i)))
753 return copy_mc_pipe_to_iter(addr, bytes, i);
754 if (iter_is_iovec(i))
755 might_fault();
756 __iterate_and_advance(i, bytes, base, len, off,
757 copyout_mc(base, addr + off, len),
758 copy_mc_to_kernel(base, addr + off, len)
759 )
760
761 return bytes;
762 }
763 EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
764 #endif /* CONFIG_ARCH_HAS_COPY_MC */
765
_copy_from_iter(void * addr,size_t bytes,struct iov_iter * i)766 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
767 {
768 if (unlikely(iov_iter_is_pipe(i))) {
769 WARN_ON(1);
770 return 0;
771 }
772 if (iter_is_iovec(i))
773 might_fault();
774 iterate_and_advance(i, bytes, base, len, off,
775 copyin(addr + off, base, len),
776 memcpy(addr + off, base, len)
777 )
778
779 return bytes;
780 }
781 EXPORT_SYMBOL(_copy_from_iter);
782
_copy_from_iter_nocache(void * addr,size_t bytes,struct iov_iter * i)783 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
784 {
785 if (unlikely(iov_iter_is_pipe(i))) {
786 WARN_ON(1);
787 return 0;
788 }
789 iterate_and_advance(i, bytes, base, len, off,
790 __copy_from_user_inatomic_nocache(addr + off, base, len),
791 memcpy(addr + off, base, len)
792 )
793
794 return bytes;
795 }
796 EXPORT_SYMBOL(_copy_from_iter_nocache);
797
798 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
799 /**
800 * _copy_from_iter_flushcache - write destination through cpu cache
801 * @addr: destination kernel address
802 * @bytes: total transfer length
803 * @i: source iterator
804 *
805 * The pmem driver arranges for filesystem-dax to use this facility via
806 * dax_copy_from_iter() for ensuring that writes to persistent memory
807 * are flushed through the CPU cache. It is differentiated from
808 * _copy_from_iter_nocache() in that guarantees all data is flushed for
809 * all iterator types. The _copy_from_iter_nocache() only attempts to
810 * bypass the cache for the ITER_IOVEC case, and on some archs may use
811 * instructions that strand dirty-data in the cache.
812 *
813 * Return: number of bytes copied (may be %0)
814 */
_copy_from_iter_flushcache(void * addr,size_t bytes,struct iov_iter * i)815 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
816 {
817 if (unlikely(iov_iter_is_pipe(i))) {
818 WARN_ON(1);
819 return 0;
820 }
821 iterate_and_advance(i, bytes, base, len, off,
822 __copy_from_user_flushcache(addr + off, base, len),
823 memcpy_flushcache(addr + off, base, len)
824 )
825
826 return bytes;
827 }
828 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
829 #endif
830
page_copy_sane(struct page * page,size_t offset,size_t n)831 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
832 {
833 struct page *head;
834 size_t v = n + offset;
835
836 /*
837 * The general case needs to access the page order in order
838 * to compute the page size.
839 * However, we mostly deal with order-0 pages and thus can
840 * avoid a possible cache line miss for requests that fit all
841 * page orders.
842 */
843 if (n <= v && v <= PAGE_SIZE)
844 return true;
845
846 head = compound_head(page);
847 v += (page - head) << PAGE_SHIFT;
848
849 if (likely(n <= v && v <= (page_size(head))))
850 return true;
851 WARN_ON(1);
852 return false;
853 }
854
__copy_page_to_iter(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)855 static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
856 struct iov_iter *i)
857 {
858 if (likely(iter_is_iovec(i)))
859 return copy_page_to_iter_iovec(page, offset, bytes, i);
860 if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
861 void *kaddr = kmap_local_page(page);
862 size_t wanted = _copy_to_iter(kaddr + offset, bytes, i);
863 kunmap_local(kaddr);
864 return wanted;
865 }
866 if (iov_iter_is_pipe(i))
867 return copy_page_to_iter_pipe(page, offset, bytes, i);
868 if (unlikely(iov_iter_is_discard(i))) {
869 if (unlikely(i->count < bytes))
870 bytes = i->count;
871 i->count -= bytes;
872 return bytes;
873 }
874 WARN_ON(1);
875 return 0;
876 }
877
copy_page_to_iter(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)878 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
879 struct iov_iter *i)
880 {
881 size_t res = 0;
882 if (unlikely(!page_copy_sane(page, offset, bytes)))
883 return 0;
884 page += offset / PAGE_SIZE; // first subpage
885 offset %= PAGE_SIZE;
886 while (1) {
887 size_t n = __copy_page_to_iter(page, offset,
888 min(bytes, (size_t)PAGE_SIZE - offset), i);
889 res += n;
890 bytes -= n;
891 if (!bytes || !n)
892 break;
893 offset += n;
894 if (offset == PAGE_SIZE) {
895 page++;
896 offset = 0;
897 }
898 }
899 return res;
900 }
901 EXPORT_SYMBOL(copy_page_to_iter);
902
copy_page_from_iter(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)903 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
904 struct iov_iter *i)
905 {
906 if (unlikely(!page_copy_sane(page, offset, bytes)))
907 return 0;
908 if (likely(iter_is_iovec(i)))
909 return copy_page_from_iter_iovec(page, offset, bytes, i);
910 if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
911 void *kaddr = kmap_local_page(page);
912 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
913 kunmap_local(kaddr);
914 return wanted;
915 }
916 WARN_ON(1);
917 return 0;
918 }
919 EXPORT_SYMBOL(copy_page_from_iter);
920
pipe_zero(size_t bytes,struct iov_iter * i)921 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
922 {
923 struct pipe_inode_info *pipe = i->pipe;
924 unsigned int p_mask = pipe->ring_size - 1;
925 unsigned int i_head;
926 size_t n, off;
927
928 if (!sanity(i))
929 return 0;
930
931 bytes = n = push_pipe(i, bytes, &i_head, &off);
932 if (unlikely(!n))
933 return 0;
934
935 do {
936 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
937 char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
938 memset(p + off, 0, chunk);
939 kunmap_local(p);
940 i->head = i_head;
941 i->iov_offset = off + chunk;
942 n -= chunk;
943 off = 0;
944 i_head++;
945 } while (n);
946 i->count -= bytes;
947 return bytes;
948 }
949
iov_iter_zero(size_t bytes,struct iov_iter * i)950 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
951 {
952 if (unlikely(iov_iter_is_pipe(i)))
953 return pipe_zero(bytes, i);
954 iterate_and_advance(i, bytes, base, len, count,
955 clear_user(base, len),
956 memset(base, 0, len)
957 )
958
959 return bytes;
960 }
961 EXPORT_SYMBOL(iov_iter_zero);
962
copy_page_from_iter_atomic(struct page * page,unsigned offset,size_t bytes,struct iov_iter * i)963 size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes,
964 struct iov_iter *i)
965 {
966 char *kaddr = kmap_atomic(page), *p = kaddr + offset;
967 if (unlikely(!page_copy_sane(page, offset, bytes))) {
968 kunmap_atomic(kaddr);
969 return 0;
970 }
971 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
972 kunmap_atomic(kaddr);
973 WARN_ON(1);
974 return 0;
975 }
976 iterate_and_advance(i, bytes, base, len, off,
977 copyin(p + off, base, len),
978 memcpy(p + off, base, len)
979 )
980 kunmap_atomic(kaddr);
981 return bytes;
982 }
983 EXPORT_SYMBOL(copy_page_from_iter_atomic);
984
pipe_truncate(struct iov_iter * i)985 static inline void pipe_truncate(struct iov_iter *i)
986 {
987 struct pipe_inode_info *pipe = i->pipe;
988 unsigned int p_tail = pipe->tail;
989 unsigned int p_head = pipe->head;
990 unsigned int p_mask = pipe->ring_size - 1;
991
992 if (!pipe_empty(p_head, p_tail)) {
993 struct pipe_buffer *buf;
994 unsigned int i_head = i->head;
995 size_t off = i->iov_offset;
996
997 if (off) {
998 buf = &pipe->bufs[i_head & p_mask];
999 buf->len = off - buf->offset;
1000 i_head++;
1001 }
1002 while (p_head != i_head) {
1003 p_head--;
1004 pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
1005 }
1006
1007 pipe->head = p_head;
1008 }
1009 }
1010
pipe_advance(struct iov_iter * i,size_t size)1011 static void pipe_advance(struct iov_iter *i, size_t size)
1012 {
1013 struct pipe_inode_info *pipe = i->pipe;
1014 if (size) {
1015 struct pipe_buffer *buf;
1016 unsigned int p_mask = pipe->ring_size - 1;
1017 unsigned int i_head = i->head;
1018 size_t off = i->iov_offset, left = size;
1019
1020 if (off) /* make it relative to the beginning of buffer */
1021 left += off - pipe->bufs[i_head & p_mask].offset;
1022 while (1) {
1023 buf = &pipe->bufs[i_head & p_mask];
1024 if (left <= buf->len)
1025 break;
1026 left -= buf->len;
1027 i_head++;
1028 }
1029 i->head = i_head;
1030 i->iov_offset = buf->offset + left;
1031 }
1032 i->count -= size;
1033 /* ... and discard everything past that point */
1034 pipe_truncate(i);
1035 }
1036
iov_iter_bvec_advance(struct iov_iter * i,size_t size)1037 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
1038 {
1039 struct bvec_iter bi;
1040
1041 bi.bi_size = i->count;
1042 bi.bi_bvec_done = i->iov_offset;
1043 bi.bi_idx = 0;
1044 bvec_iter_advance(i->bvec, &bi, size);
1045
1046 i->bvec += bi.bi_idx;
1047 i->nr_segs -= bi.bi_idx;
1048 i->count = bi.bi_size;
1049 i->iov_offset = bi.bi_bvec_done;
1050 }
1051
iov_iter_iovec_advance(struct iov_iter * i,size_t size)1052 static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
1053 {
1054 const struct iovec *iov, *end;
1055
1056 if (!i->count)
1057 return;
1058 i->count -= size;
1059
1060 size += i->iov_offset; // from beginning of current segment
1061 for (iov = i->iov, end = iov + i->nr_segs; iov < end; iov++) {
1062 if (likely(size < iov->iov_len))
1063 break;
1064 size -= iov->iov_len;
1065 }
1066 i->iov_offset = size;
1067 i->nr_segs -= iov - i->iov;
1068 i->iov = iov;
1069 }
1070
iov_iter_advance(struct iov_iter * i,size_t size)1071 void iov_iter_advance(struct iov_iter *i, size_t size)
1072 {
1073 if (unlikely(i->count < size))
1074 size = i->count;
1075 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
1076 /* iovec and kvec have identical layouts */
1077 iov_iter_iovec_advance(i, size);
1078 } else if (iov_iter_is_bvec(i)) {
1079 iov_iter_bvec_advance(i, size);
1080 } else if (iov_iter_is_pipe(i)) {
1081 pipe_advance(i, size);
1082 } else if (unlikely(iov_iter_is_xarray(i))) {
1083 i->iov_offset += size;
1084 i->count -= size;
1085 } else if (iov_iter_is_discard(i)) {
1086 i->count -= size;
1087 }
1088 }
1089 EXPORT_SYMBOL(iov_iter_advance);
1090
iov_iter_revert(struct iov_iter * i,size_t unroll)1091 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1092 {
1093 if (!unroll)
1094 return;
1095 if (WARN_ON(unroll > MAX_RW_COUNT))
1096 return;
1097 i->count += unroll;
1098 if (unlikely(iov_iter_is_pipe(i))) {
1099 struct pipe_inode_info *pipe = i->pipe;
1100 unsigned int p_mask = pipe->ring_size - 1;
1101 unsigned int i_head = i->head;
1102 size_t off = i->iov_offset;
1103 while (1) {
1104 struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1105 size_t n = off - b->offset;
1106 if (unroll < n) {
1107 off -= unroll;
1108 break;
1109 }
1110 unroll -= n;
1111 if (!unroll && i_head == i->start_head) {
1112 off = 0;
1113 break;
1114 }
1115 i_head--;
1116 b = &pipe->bufs[i_head & p_mask];
1117 off = b->offset + b->len;
1118 }
1119 i->iov_offset = off;
1120 i->head = i_head;
1121 pipe_truncate(i);
1122 return;
1123 }
1124 if (unlikely(iov_iter_is_discard(i)))
1125 return;
1126 if (unroll <= i->iov_offset) {
1127 i->iov_offset -= unroll;
1128 return;
1129 }
1130 unroll -= i->iov_offset;
1131 if (iov_iter_is_xarray(i)) {
1132 BUG(); /* We should never go beyond the start of the specified
1133 * range since we might then be straying into pages that
1134 * aren't pinned.
1135 */
1136 } else if (iov_iter_is_bvec(i)) {
1137 const struct bio_vec *bvec = i->bvec;
1138 while (1) {
1139 size_t n = (--bvec)->bv_len;
1140 i->nr_segs++;
1141 if (unroll <= n) {
1142 i->bvec = bvec;
1143 i->iov_offset = n - unroll;
1144 return;
1145 }
1146 unroll -= n;
1147 }
1148 } else { /* same logics for iovec and kvec */
1149 const struct iovec *iov = i->iov;
1150 while (1) {
1151 size_t n = (--iov)->iov_len;
1152 i->nr_segs++;
1153 if (unroll <= n) {
1154 i->iov = iov;
1155 i->iov_offset = n - unroll;
1156 return;
1157 }
1158 unroll -= n;
1159 }
1160 }
1161 }
1162 EXPORT_SYMBOL(iov_iter_revert);
1163
1164 /*
1165 * Return the count of just the current iov_iter segment.
1166 */
iov_iter_single_seg_count(const struct iov_iter * i)1167 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1168 {
1169 if (i->nr_segs > 1) {
1170 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1171 return min(i->count, i->iov->iov_len - i->iov_offset);
1172 if (iov_iter_is_bvec(i))
1173 return min(i->count, i->bvec->bv_len - i->iov_offset);
1174 }
1175 return i->count;
1176 }
1177 EXPORT_SYMBOL(iov_iter_single_seg_count);
1178
iov_iter_kvec(struct iov_iter * i,unsigned int direction,const struct kvec * kvec,unsigned long nr_segs,size_t count)1179 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1180 const struct kvec *kvec, unsigned long nr_segs,
1181 size_t count)
1182 {
1183 WARN_ON(direction & ~(READ | WRITE));
1184 *i = (struct iov_iter){
1185 .iter_type = ITER_KVEC,
1186 .data_source = direction,
1187 .kvec = kvec,
1188 .nr_segs = nr_segs,
1189 .iov_offset = 0,
1190 .count = count
1191 };
1192 }
1193 EXPORT_SYMBOL(iov_iter_kvec);
1194
iov_iter_bvec(struct iov_iter * i,unsigned int direction,const struct bio_vec * bvec,unsigned long nr_segs,size_t count)1195 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1196 const struct bio_vec *bvec, unsigned long nr_segs,
1197 size_t count)
1198 {
1199 WARN_ON(direction & ~(READ | WRITE));
1200 *i = (struct iov_iter){
1201 .iter_type = ITER_BVEC,
1202 .data_source = direction,
1203 .bvec = bvec,
1204 .nr_segs = nr_segs,
1205 .iov_offset = 0,
1206 .count = count
1207 };
1208 }
1209 EXPORT_SYMBOL(iov_iter_bvec);
1210
iov_iter_pipe(struct iov_iter * i,unsigned int direction,struct pipe_inode_info * pipe,size_t count)1211 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1212 struct pipe_inode_info *pipe,
1213 size_t count)
1214 {
1215 BUG_ON(direction != READ);
1216 WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
1217 *i = (struct iov_iter){
1218 .iter_type = ITER_PIPE,
1219 .data_source = false,
1220 .pipe = pipe,
1221 .head = pipe->head,
1222 .start_head = pipe->head,
1223 .iov_offset = 0,
1224 .count = count
1225 };
1226 }
1227 EXPORT_SYMBOL(iov_iter_pipe);
1228
1229 /**
1230 * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
1231 * @i: The iterator to initialise.
1232 * @direction: The direction of the transfer.
1233 * @xarray: The xarray to access.
1234 * @start: The start file position.
1235 * @count: The size of the I/O buffer in bytes.
1236 *
1237 * Set up an I/O iterator to either draw data out of the pages attached to an
1238 * inode or to inject data into those pages. The pages *must* be prevented
1239 * from evaporation, either by taking a ref on them or locking them by the
1240 * caller.
1241 */
iov_iter_xarray(struct iov_iter * i,unsigned int direction,struct xarray * xarray,loff_t start,size_t count)1242 void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
1243 struct xarray *xarray, loff_t start, size_t count)
1244 {
1245 BUG_ON(direction & ~1);
1246 *i = (struct iov_iter) {
1247 .iter_type = ITER_XARRAY,
1248 .data_source = direction,
1249 .xarray = xarray,
1250 .xarray_start = start,
1251 .count = count,
1252 .iov_offset = 0
1253 };
1254 }
1255 EXPORT_SYMBOL(iov_iter_xarray);
1256
1257 /**
1258 * iov_iter_discard - Initialise an I/O iterator that discards data
1259 * @i: The iterator to initialise.
1260 * @direction: The direction of the transfer.
1261 * @count: The size of the I/O buffer in bytes.
1262 *
1263 * Set up an I/O iterator that just discards everything that's written to it.
1264 * It's only available as a READ iterator.
1265 */
iov_iter_discard(struct iov_iter * i,unsigned int direction,size_t count)1266 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1267 {
1268 BUG_ON(direction != READ);
1269 *i = (struct iov_iter){
1270 .iter_type = ITER_DISCARD,
1271 .data_source = false,
1272 .count = count,
1273 .iov_offset = 0
1274 };
1275 }
1276 EXPORT_SYMBOL(iov_iter_discard);
1277
iov_iter_alignment_iovec(const struct iov_iter * i)1278 static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
1279 {
1280 unsigned long res = 0;
1281 size_t size = i->count;
1282 size_t skip = i->iov_offset;
1283 unsigned k;
1284
1285 for (k = 0; k < i->nr_segs; k++, skip = 0) {
1286 size_t len = i->iov[k].iov_len - skip;
1287 if (len) {
1288 res |= (unsigned long)i->iov[k].iov_base + skip;
1289 if (len > size)
1290 len = size;
1291 res |= len;
1292 size -= len;
1293 if (!size)
1294 break;
1295 }
1296 }
1297 return res;
1298 }
1299
iov_iter_alignment_bvec(const struct iov_iter * i)1300 static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
1301 {
1302 unsigned res = 0;
1303 size_t size = i->count;
1304 unsigned skip = i->iov_offset;
1305 unsigned k;
1306
1307 for (k = 0; k < i->nr_segs; k++, skip = 0) {
1308 size_t len = i->bvec[k].bv_len - skip;
1309 res |= (unsigned long)i->bvec[k].bv_offset + skip;
1310 if (len > size)
1311 len = size;
1312 res |= len;
1313 size -= len;
1314 if (!size)
1315 break;
1316 }
1317 return res;
1318 }
1319
iov_iter_alignment(const struct iov_iter * i)1320 unsigned long iov_iter_alignment(const struct iov_iter *i)
1321 {
1322 /* iovec and kvec have identical layouts */
1323 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1324 return iov_iter_alignment_iovec(i);
1325
1326 if (iov_iter_is_bvec(i))
1327 return iov_iter_alignment_bvec(i);
1328
1329 if (iov_iter_is_pipe(i)) {
1330 unsigned int p_mask = i->pipe->ring_size - 1;
1331 size_t size = i->count;
1332
1333 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1334 return size | i->iov_offset;
1335 return size;
1336 }
1337
1338 if (iov_iter_is_xarray(i))
1339 return (i->xarray_start + i->iov_offset) | i->count;
1340
1341 return 0;
1342 }
1343 EXPORT_SYMBOL(iov_iter_alignment);
1344
iov_iter_gap_alignment(const struct iov_iter * i)1345 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1346 {
1347 unsigned long res = 0;
1348 unsigned long v = 0;
1349 size_t size = i->count;
1350 unsigned k;
1351
1352 if (WARN_ON(!iter_is_iovec(i)))
1353 return ~0U;
1354
1355 for (k = 0; k < i->nr_segs; k++) {
1356 if (i->iov[k].iov_len) {
1357 unsigned long base = (unsigned long)i->iov[k].iov_base;
1358 if (v) // if not the first one
1359 res |= base | v; // this start | previous end
1360 v = base + i->iov[k].iov_len;
1361 if (size <= i->iov[k].iov_len)
1362 break;
1363 size -= i->iov[k].iov_len;
1364 }
1365 }
1366 return res;
1367 }
1368 EXPORT_SYMBOL(iov_iter_gap_alignment);
1369
__pipe_get_pages(struct iov_iter * i,size_t maxsize,struct page ** pages,int iter_head,size_t * start)1370 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1371 size_t maxsize,
1372 struct page **pages,
1373 int iter_head,
1374 size_t *start)
1375 {
1376 struct pipe_inode_info *pipe = i->pipe;
1377 unsigned int p_mask = pipe->ring_size - 1;
1378 ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1379 if (!n)
1380 return -EFAULT;
1381
1382 maxsize = n;
1383 n += *start;
1384 while (n > 0) {
1385 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1386 iter_head++;
1387 n -= PAGE_SIZE;
1388 }
1389
1390 return maxsize;
1391 }
1392
pipe_get_pages(struct iov_iter * i,struct page ** pages,size_t maxsize,unsigned maxpages,size_t * start)1393 static ssize_t pipe_get_pages(struct iov_iter *i,
1394 struct page **pages, size_t maxsize, unsigned maxpages,
1395 size_t *start)
1396 {
1397 unsigned int iter_head, npages;
1398 size_t capacity;
1399
1400 if (!sanity(i))
1401 return -EFAULT;
1402
1403 data_start(i, &iter_head, start);
1404 /* Amount of free space: some of this one + all after this one */
1405 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1406 capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1407
1408 return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1409 }
1410
iter_xarray_populate_pages(struct page ** pages,struct xarray * xa,pgoff_t index,unsigned int nr_pages)1411 static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
1412 pgoff_t index, unsigned int nr_pages)
1413 {
1414 XA_STATE(xas, xa, index);
1415 struct page *page;
1416 unsigned int ret = 0;
1417
1418 rcu_read_lock();
1419 for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1420 if (xas_retry(&xas, page))
1421 continue;
1422
1423 /* Has the page moved or been split? */
1424 if (unlikely(page != xas_reload(&xas))) {
1425 xas_reset(&xas);
1426 continue;
1427 }
1428
1429 pages[ret] = find_subpage(page, xas.xa_index);
1430 get_page(pages[ret]);
1431 if (++ret == nr_pages)
1432 break;
1433 }
1434 rcu_read_unlock();
1435 return ret;
1436 }
1437
iter_xarray_get_pages(struct iov_iter * i,struct page ** pages,size_t maxsize,unsigned maxpages,size_t * _start_offset)1438 static ssize_t iter_xarray_get_pages(struct iov_iter *i,
1439 struct page **pages, size_t maxsize,
1440 unsigned maxpages, size_t *_start_offset)
1441 {
1442 unsigned nr, offset;
1443 pgoff_t index, count;
1444 size_t size = maxsize;
1445 loff_t pos;
1446
1447 if (!size || !maxpages)
1448 return 0;
1449
1450 pos = i->xarray_start + i->iov_offset;
1451 index = pos >> PAGE_SHIFT;
1452 offset = pos & ~PAGE_MASK;
1453 *_start_offset = offset;
1454
1455 count = 1;
1456 if (size > PAGE_SIZE - offset) {
1457 size -= PAGE_SIZE - offset;
1458 count += size >> PAGE_SHIFT;
1459 size &= ~PAGE_MASK;
1460 if (size)
1461 count++;
1462 }
1463
1464 if (count > maxpages)
1465 count = maxpages;
1466
1467 nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
1468 if (nr == 0)
1469 return 0;
1470
1471 return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
1472 }
1473
1474 /* must be done on non-empty ITER_IOVEC one */
first_iovec_segment(const struct iov_iter * i,size_t * size,size_t * start,size_t maxsize,unsigned maxpages)1475 static unsigned long first_iovec_segment(const struct iov_iter *i,
1476 size_t *size, size_t *start,
1477 size_t maxsize, unsigned maxpages)
1478 {
1479 size_t skip;
1480 long k;
1481
1482 for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
1483 unsigned long addr = (unsigned long)i->iov[k].iov_base + skip;
1484 size_t len = i->iov[k].iov_len - skip;
1485
1486 if (unlikely(!len))
1487 continue;
1488 if (len > maxsize)
1489 len = maxsize;
1490 len += (*start = addr % PAGE_SIZE);
1491 if (len > maxpages * PAGE_SIZE)
1492 len = maxpages * PAGE_SIZE;
1493 *size = len;
1494 return addr & PAGE_MASK;
1495 }
1496 BUG(); // if it had been empty, we wouldn't get called
1497 }
1498
1499 /* must be done on non-empty ITER_BVEC one */
first_bvec_segment(const struct iov_iter * i,size_t * size,size_t * start,size_t maxsize,unsigned maxpages)1500 static struct page *first_bvec_segment(const struct iov_iter *i,
1501 size_t *size, size_t *start,
1502 size_t maxsize, unsigned maxpages)
1503 {
1504 struct page *page;
1505 size_t skip = i->iov_offset, len;
1506
1507 len = i->bvec->bv_len - skip;
1508 if (len > maxsize)
1509 len = maxsize;
1510 skip += i->bvec->bv_offset;
1511 page = i->bvec->bv_page + skip / PAGE_SIZE;
1512 len += (*start = skip % PAGE_SIZE);
1513 if (len > maxpages * PAGE_SIZE)
1514 len = maxpages * PAGE_SIZE;
1515 *size = len;
1516 return page;
1517 }
1518
iov_iter_get_pages(struct iov_iter * i,struct page ** pages,size_t maxsize,unsigned maxpages,size_t * start)1519 ssize_t iov_iter_get_pages(struct iov_iter *i,
1520 struct page **pages, size_t maxsize, unsigned maxpages,
1521 size_t *start)
1522 {
1523 size_t len;
1524 int n, res;
1525
1526 if (maxsize > i->count)
1527 maxsize = i->count;
1528 if (!maxsize)
1529 return 0;
1530
1531 if (likely(iter_is_iovec(i))) {
1532 unsigned int gup_flags = 0;
1533 unsigned long addr;
1534
1535 if (iov_iter_rw(i) != WRITE)
1536 gup_flags |= FOLL_WRITE;
1537 if (i->nofault)
1538 gup_flags |= FOLL_NOFAULT;
1539
1540 addr = first_iovec_segment(i, &len, start, maxsize, maxpages);
1541 n = DIV_ROUND_UP(len, PAGE_SIZE);
1542 res = get_user_pages_fast(addr, n, gup_flags, pages);
1543 if (unlikely(res <= 0))
1544 return res;
1545 return (res == n ? len : res * PAGE_SIZE) - *start;
1546 }
1547 if (iov_iter_is_bvec(i)) {
1548 struct page *page;
1549
1550 page = first_bvec_segment(i, &len, start, maxsize, maxpages);
1551 n = DIV_ROUND_UP(len, PAGE_SIZE);
1552 while (n--)
1553 get_page(*pages++ = page++);
1554 return len - *start;
1555 }
1556 if (iov_iter_is_pipe(i))
1557 return pipe_get_pages(i, pages, maxsize, maxpages, start);
1558 if (iov_iter_is_xarray(i))
1559 return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
1560 return -EFAULT;
1561 }
1562 EXPORT_SYMBOL(iov_iter_get_pages);
1563
get_pages_array(size_t n)1564 static struct page **get_pages_array(size_t n)
1565 {
1566 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1567 }
1568
pipe_get_pages_alloc(struct iov_iter * i,struct page *** pages,size_t maxsize,size_t * start)1569 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1570 struct page ***pages, size_t maxsize,
1571 size_t *start)
1572 {
1573 struct page **p;
1574 unsigned int iter_head, npages;
1575 ssize_t n;
1576
1577 if (!sanity(i))
1578 return -EFAULT;
1579
1580 data_start(i, &iter_head, start);
1581 /* Amount of free space: some of this one + all after this one */
1582 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1583 n = npages * PAGE_SIZE - *start;
1584 if (maxsize > n)
1585 maxsize = n;
1586 else
1587 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1588 p = get_pages_array(npages);
1589 if (!p)
1590 return -ENOMEM;
1591 n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1592 if (n > 0)
1593 *pages = p;
1594 else
1595 kvfree(p);
1596 return n;
1597 }
1598
iter_xarray_get_pages_alloc(struct iov_iter * i,struct page *** pages,size_t maxsize,size_t * _start_offset)1599 static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
1600 struct page ***pages, size_t maxsize,
1601 size_t *_start_offset)
1602 {
1603 struct page **p;
1604 unsigned nr, offset;
1605 pgoff_t index, count;
1606 size_t size = maxsize;
1607 loff_t pos;
1608
1609 if (!size)
1610 return 0;
1611
1612 pos = i->xarray_start + i->iov_offset;
1613 index = pos >> PAGE_SHIFT;
1614 offset = pos & ~PAGE_MASK;
1615 *_start_offset = offset;
1616
1617 count = 1;
1618 if (size > PAGE_SIZE - offset) {
1619 size -= PAGE_SIZE - offset;
1620 count += size >> PAGE_SHIFT;
1621 size &= ~PAGE_MASK;
1622 if (size)
1623 count++;
1624 }
1625
1626 p = get_pages_array(count);
1627 if (!p)
1628 return -ENOMEM;
1629 *pages = p;
1630
1631 nr = iter_xarray_populate_pages(p, i->xarray, index, count);
1632 if (nr == 0)
1633 return 0;
1634
1635 return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
1636 }
1637
iov_iter_get_pages_alloc(struct iov_iter * i,struct page *** pages,size_t maxsize,size_t * start)1638 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1639 struct page ***pages, size_t maxsize,
1640 size_t *start)
1641 {
1642 struct page **p;
1643 size_t len;
1644 int n, res;
1645
1646 if (maxsize > i->count)
1647 maxsize = i->count;
1648 if (!maxsize)
1649 return 0;
1650
1651 if (likely(iter_is_iovec(i))) {
1652 unsigned int gup_flags = 0;
1653 unsigned long addr;
1654
1655 if (iov_iter_rw(i) != WRITE)
1656 gup_flags |= FOLL_WRITE;
1657 if (i->nofault)
1658 gup_flags |= FOLL_NOFAULT;
1659
1660 addr = first_iovec_segment(i, &len, start, maxsize, ~0U);
1661 n = DIV_ROUND_UP(len, PAGE_SIZE);
1662 p = get_pages_array(n);
1663 if (!p)
1664 return -ENOMEM;
1665 res = get_user_pages_fast(addr, n, gup_flags, p);
1666 if (unlikely(res <= 0)) {
1667 kvfree(p);
1668 *pages = NULL;
1669 return res;
1670 }
1671 *pages = p;
1672 return (res == n ? len : res * PAGE_SIZE) - *start;
1673 }
1674 if (iov_iter_is_bvec(i)) {
1675 struct page *page;
1676
1677 page = first_bvec_segment(i, &len, start, maxsize, ~0U);
1678 n = DIV_ROUND_UP(len, PAGE_SIZE);
1679 *pages = p = get_pages_array(n);
1680 if (!p)
1681 return -ENOMEM;
1682 while (n--)
1683 get_page(*p++ = page++);
1684 return len - *start;
1685 }
1686 if (iov_iter_is_pipe(i))
1687 return pipe_get_pages_alloc(i, pages, maxsize, start);
1688 if (iov_iter_is_xarray(i))
1689 return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
1690 return -EFAULT;
1691 }
1692 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1693
csum_and_copy_from_iter(void * addr,size_t bytes,__wsum * csum,struct iov_iter * i)1694 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1695 struct iov_iter *i)
1696 {
1697 __wsum sum, next;
1698 sum = *csum;
1699 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1700 WARN_ON(1);
1701 return 0;
1702 }
1703 iterate_and_advance(i, bytes, base, len, off, ({
1704 next = csum_and_copy_from_user(base, addr + off, len);
1705 sum = csum_block_add(sum, next, off);
1706 next ? 0 : len;
1707 }), ({
1708 sum = csum_and_memcpy(addr + off, base, len, sum, off);
1709 })
1710 )
1711 *csum = sum;
1712 return bytes;
1713 }
1714 EXPORT_SYMBOL(csum_and_copy_from_iter);
1715
csum_and_copy_to_iter(const void * addr,size_t bytes,void * _csstate,struct iov_iter * i)1716 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1717 struct iov_iter *i)
1718 {
1719 struct csum_state *csstate = _csstate;
1720 __wsum sum, next;
1721
1722 if (unlikely(iov_iter_is_discard(i))) {
1723 WARN_ON(1); /* for now */
1724 return 0;
1725 }
1726
1727 sum = csum_shift(csstate->csum, csstate->off);
1728 if (unlikely(iov_iter_is_pipe(i)))
1729 bytes = csum_and_copy_to_pipe_iter(addr, bytes, i, &sum);
1730 else iterate_and_advance(i, bytes, base, len, off, ({
1731 next = csum_and_copy_to_user(addr + off, base, len);
1732 sum = csum_block_add(sum, next, off);
1733 next ? 0 : len;
1734 }), ({
1735 sum = csum_and_memcpy(base, addr + off, len, sum, off);
1736 })
1737 )
1738 csstate->csum = csum_shift(sum, csstate->off);
1739 csstate->off += bytes;
1740 return bytes;
1741 }
1742 EXPORT_SYMBOL(csum_and_copy_to_iter);
1743
hash_and_copy_to_iter(const void * addr,size_t bytes,void * hashp,struct iov_iter * i)1744 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1745 struct iov_iter *i)
1746 {
1747 #ifdef CONFIG_CRYPTO_HASH
1748 struct ahash_request *hash = hashp;
1749 struct scatterlist sg;
1750 size_t copied;
1751
1752 copied = copy_to_iter(addr, bytes, i);
1753 sg_init_one(&sg, addr, copied);
1754 ahash_request_set_crypt(hash, &sg, NULL, copied);
1755 crypto_ahash_update(hash);
1756 return copied;
1757 #else
1758 return 0;
1759 #endif
1760 }
1761 EXPORT_SYMBOL(hash_and_copy_to_iter);
1762
iov_npages(const struct iov_iter * i,int maxpages)1763 static int iov_npages(const struct iov_iter *i, int maxpages)
1764 {
1765 size_t skip = i->iov_offset, size = i->count;
1766 const struct iovec *p;
1767 int npages = 0;
1768
1769 for (p = i->iov; size; skip = 0, p++) {
1770 unsigned offs = offset_in_page(p->iov_base + skip);
1771 size_t len = min(p->iov_len - skip, size);
1772
1773 if (len) {
1774 size -= len;
1775 npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
1776 if (unlikely(npages > maxpages))
1777 return maxpages;
1778 }
1779 }
1780 return npages;
1781 }
1782
bvec_npages(const struct iov_iter * i,int maxpages)1783 static int bvec_npages(const struct iov_iter *i, int maxpages)
1784 {
1785 size_t skip = i->iov_offset, size = i->count;
1786 const struct bio_vec *p;
1787 int npages = 0;
1788
1789 for (p = i->bvec; size; skip = 0, p++) {
1790 unsigned offs = (p->bv_offset + skip) % PAGE_SIZE;
1791 size_t len = min(p->bv_len - skip, size);
1792
1793 size -= len;
1794 npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
1795 if (unlikely(npages > maxpages))
1796 return maxpages;
1797 }
1798 return npages;
1799 }
1800
iov_iter_npages(const struct iov_iter * i,int maxpages)1801 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1802 {
1803 if (unlikely(!i->count))
1804 return 0;
1805 /* iovec and kvec have identical layouts */
1806 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1807 return iov_npages(i, maxpages);
1808 if (iov_iter_is_bvec(i))
1809 return bvec_npages(i, maxpages);
1810 if (iov_iter_is_pipe(i)) {
1811 unsigned int iter_head;
1812 int npages;
1813 size_t off;
1814
1815 if (!sanity(i))
1816 return 0;
1817
1818 data_start(i, &iter_head, &off);
1819 /* some of this one + all after this one */
1820 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1821 return min(npages, maxpages);
1822 }
1823 if (iov_iter_is_xarray(i)) {
1824 unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
1825 int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
1826 return min(npages, maxpages);
1827 }
1828 return 0;
1829 }
1830 EXPORT_SYMBOL(iov_iter_npages);
1831
dup_iter(struct iov_iter * new,struct iov_iter * old,gfp_t flags)1832 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1833 {
1834 *new = *old;
1835 if (unlikely(iov_iter_is_pipe(new))) {
1836 WARN_ON(1);
1837 return NULL;
1838 }
1839 if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
1840 return NULL;
1841 if (iov_iter_is_bvec(new))
1842 return new->bvec = kmemdup(new->bvec,
1843 new->nr_segs * sizeof(struct bio_vec),
1844 flags);
1845 else
1846 /* iovec and kvec have identical layout */
1847 return new->iov = kmemdup(new->iov,
1848 new->nr_segs * sizeof(struct iovec),
1849 flags);
1850 }
1851 EXPORT_SYMBOL(dup_iter);
1852
copy_compat_iovec_from_user(struct iovec * iov,const struct iovec __user * uvec,unsigned long nr_segs)1853 static int copy_compat_iovec_from_user(struct iovec *iov,
1854 const struct iovec __user *uvec, unsigned long nr_segs)
1855 {
1856 const struct compat_iovec __user *uiov =
1857 (const struct compat_iovec __user *)uvec;
1858 int ret = -EFAULT, i;
1859
1860 if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
1861 return -EFAULT;
1862
1863 for (i = 0; i < nr_segs; i++) {
1864 compat_uptr_t buf;
1865 compat_ssize_t len;
1866
1867 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1868 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1869
1870 /* check for compat_size_t not fitting in compat_ssize_t .. */
1871 if (len < 0) {
1872 ret = -EINVAL;
1873 goto uaccess_end;
1874 }
1875 iov[i].iov_base = compat_ptr(buf);
1876 iov[i].iov_len = len;
1877 }
1878
1879 ret = 0;
1880 uaccess_end:
1881 user_access_end();
1882 return ret;
1883 }
1884
copy_iovec_from_user(struct iovec * iov,const struct iovec __user * uvec,unsigned long nr_segs)1885 static int copy_iovec_from_user(struct iovec *iov,
1886 const struct iovec __user *uvec, unsigned long nr_segs)
1887 {
1888 unsigned long seg;
1889
1890 if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1891 return -EFAULT;
1892 for (seg = 0; seg < nr_segs; seg++) {
1893 if ((ssize_t)iov[seg].iov_len < 0)
1894 return -EINVAL;
1895 }
1896
1897 return 0;
1898 }
1899
iovec_from_user(const struct iovec __user * uvec,unsigned long nr_segs,unsigned long fast_segs,struct iovec * fast_iov,bool compat)1900 struct iovec *iovec_from_user(const struct iovec __user *uvec,
1901 unsigned long nr_segs, unsigned long fast_segs,
1902 struct iovec *fast_iov, bool compat)
1903 {
1904 struct iovec *iov = fast_iov;
1905 int ret;
1906
1907 /*
1908 * SuS says "The readv() function *may* fail if the iovcnt argument was
1909 * less than or equal to 0, or greater than {IOV_MAX}. Linux has
1910 * traditionally returned zero for zero segments, so...
1911 */
1912 if (nr_segs == 0)
1913 return iov;
1914 if (nr_segs > UIO_MAXIOV)
1915 return ERR_PTR(-EINVAL);
1916 if (nr_segs > fast_segs) {
1917 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
1918 if (!iov)
1919 return ERR_PTR(-ENOMEM);
1920 }
1921
1922 if (compat)
1923 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1924 else
1925 ret = copy_iovec_from_user(iov, uvec, nr_segs);
1926 if (ret) {
1927 if (iov != fast_iov)
1928 kfree(iov);
1929 return ERR_PTR(ret);
1930 }
1931
1932 return iov;
1933 }
1934
__import_iovec(int type,const struct iovec __user * uvec,unsigned nr_segs,unsigned fast_segs,struct iovec ** iovp,struct iov_iter * i,bool compat)1935 ssize_t __import_iovec(int type, const struct iovec __user *uvec,
1936 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
1937 struct iov_iter *i, bool compat)
1938 {
1939 ssize_t total_len = 0;
1940 unsigned long seg;
1941 struct iovec *iov;
1942
1943 iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
1944 if (IS_ERR(iov)) {
1945 *iovp = NULL;
1946 return PTR_ERR(iov);
1947 }
1948
1949 /*
1950 * According to the Single Unix Specification we should return EINVAL if
1951 * an element length is < 0 when cast to ssize_t or if the total length
1952 * would overflow the ssize_t return value of the system call.
1953 *
1954 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
1955 * overflow case.
1956 */
1957 for (seg = 0; seg < nr_segs; seg++) {
1958 ssize_t len = (ssize_t)iov[seg].iov_len;
1959
1960 if (!access_ok(iov[seg].iov_base, len)) {
1961 if (iov != *iovp)
1962 kfree(iov);
1963 *iovp = NULL;
1964 return -EFAULT;
1965 }
1966
1967 if (len > MAX_RW_COUNT - total_len) {
1968 len = MAX_RW_COUNT - total_len;
1969 iov[seg].iov_len = len;
1970 }
1971 total_len += len;
1972 }
1973
1974 iov_iter_init(i, type, iov, nr_segs, total_len);
1975 if (iov == *iovp)
1976 *iovp = NULL;
1977 else
1978 *iovp = iov;
1979 return total_len;
1980 }
1981
1982 /**
1983 * import_iovec() - Copy an array of &struct iovec from userspace
1984 * into the kernel, check that it is valid, and initialize a new
1985 * &struct iov_iter iterator to access it.
1986 *
1987 * @type: One of %READ or %WRITE.
1988 * @uvec: Pointer to the userspace array.
1989 * @nr_segs: Number of elements in userspace array.
1990 * @fast_segs: Number of elements in @iov.
1991 * @iovp: (input and output parameter) Pointer to pointer to (usually small
1992 * on-stack) kernel array.
1993 * @i: Pointer to iterator that will be initialized on success.
1994 *
1995 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1996 * then this function places %NULL in *@iov on return. Otherwise, a new
1997 * array will be allocated and the result placed in *@iov. This means that
1998 * the caller may call kfree() on *@iov regardless of whether the small
1999 * on-stack array was used or not (and regardless of whether this function
2000 * returns an error or not).
2001 *
2002 * Return: Negative error code on error, bytes imported on success
2003 */
import_iovec(int type,const struct iovec __user * uvec,unsigned nr_segs,unsigned fast_segs,struct iovec ** iovp,struct iov_iter * i)2004 ssize_t import_iovec(int type, const struct iovec __user *uvec,
2005 unsigned nr_segs, unsigned fast_segs,
2006 struct iovec **iovp, struct iov_iter *i)
2007 {
2008 return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
2009 in_compat_syscall());
2010 }
2011 EXPORT_SYMBOL(import_iovec);
2012
import_single_range(int rw,void __user * buf,size_t len,struct iovec * iov,struct iov_iter * i)2013 int import_single_range(int rw, void __user *buf, size_t len,
2014 struct iovec *iov, struct iov_iter *i)
2015 {
2016 if (len > MAX_RW_COUNT)
2017 len = MAX_RW_COUNT;
2018 if (unlikely(!access_ok(buf, len)))
2019 return -EFAULT;
2020
2021 iov->iov_base = buf;
2022 iov->iov_len = len;
2023 iov_iter_init(i, rw, iov, 1, len);
2024 return 0;
2025 }
2026 EXPORT_SYMBOL(import_single_range);
2027
2028 /**
2029 * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
2030 * iov_iter_save_state() was called.
2031 *
2032 * @i: &struct iov_iter to restore
2033 * @state: state to restore from
2034 *
2035 * Used after iov_iter_save_state() to bring restore @i, if operations may
2036 * have advanced it.
2037 *
2038 * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC
2039 */
iov_iter_restore(struct iov_iter * i,struct iov_iter_state * state)2040 void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
2041 {
2042 if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
2043 !iov_iter_is_kvec(i))
2044 return;
2045 i->iov_offset = state->iov_offset;
2046 i->count = state->count;
2047 /*
2048 * For the *vec iters, nr_segs + iov is constant - if we increment
2049 * the vec, then we also decrement the nr_segs count. Hence we don't
2050 * need to track both of these, just one is enough and we can deduct
2051 * the other from that. ITER_KVEC and ITER_IOVEC are the same struct
2052 * size, so we can just increment the iov pointer as they are unionzed.
2053 * ITER_BVEC _may_ be the same size on some archs, but on others it is
2054 * not. Be safe and handle it separately.
2055 */
2056 BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
2057 if (iov_iter_is_bvec(i))
2058 i->bvec -= state->nr_segs - i->nr_segs;
2059 else
2060 i->iov -= state->nr_segs - i->nr_segs;
2061 i->nr_segs = state->nr_segs;
2062 }
2063