1 /*
2  *   Copyright (C) International Business Machines Corp., 2000-2004
3  *
4  *   This program is free software;  you can redistribute it and/or modify
5  *   it under the terms of the GNU General Public License as published by
6  *   the Free Software Foundation; either version 2 of the License, or
7  *   (at your option) any later version.
8  *
9  *   This program is distributed in the hope that it will be useful,
10  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12  *   the GNU General Public License for more details.
13  *
14  *   You should have received a copy of the GNU General Public License
15  *   along with this program;  if not, write to the Free Software
16  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  */
18 /*
19  *      jfs_xtree.c: extent allocation descriptor B+-tree manager
20  */
21 
22 #include <linux/fs.h>
23 #include "jfs_incore.h"
24 #include "jfs_filsys.h"
25 #include "jfs_metapage.h"
26 #include "jfs_dmap.h"
27 #include "jfs_dinode.h"
28 #include "jfs_superblock.h"
29 #include "jfs_debug.h"
30 
31 /*
32  * xtree local flag
33  */
34 #define XT_INSERT       0x00000001
35 
36 /*
37  *       xtree key/entry comparison: extent offset
38  *
39  * return:
40  *      -1: k < start of extent
41  *       0: start_of_extent <= k <= end_of_extent
42  *       1: k > end_of_extent
43  */
44 #define XT_CMP(CMP, K, X, OFFSET64)\
45 {\
46         OFFSET64 = offsetXAD(X);\
47         (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
48               ((K) < OFFSET64) ? -1 : 0;\
49 }
50 
51 /* write a xad entry */
52 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
53 {\
54         (XAD)->flag = (FLAG);\
55         XADoffset((XAD), (OFF));\
56         XADlength((XAD), (LEN));\
57         XADaddress((XAD), (ADDR));\
58 }
59 
60 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
61 
62 /* get page buffer for specified block address */
63 /* ToDo: Replace this ugly macro with a function */
64 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
65 {\
66 	BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\
67 	if (!(RC))\
68 	{\
69 		if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\
70 		    (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\
71 		    (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\
72 		{\
73 			jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\
74 			BT_PUTPAGE(MP);\
75 			MP = NULL;\
76 			RC = -EIO;\
77 		}\
78         }\
79 }
80 
81 /* for consistency */
82 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
83 
84 #define XT_GETSEARCH(IP, LEAF, BN, MP,  P, INDEX) \
85 	BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
86 /* xtree entry parameter descriptor */
87 struct xtsplit {
88 	struct metapage *mp;
89 	s16 index;
90 	u8 flag;
91 	s64 off;
92 	s64 addr;
93 	int len;
94 	struct pxdlist *pxdlist;
95 };
96 
97 
98 /*
99  *      statistics
100  */
101 #ifdef CONFIG_JFS_STATISTICS
102 static struct {
103 	uint search;
104 	uint fastSearch;
105 	uint split;
106 } xtStat;
107 #endif
108 
109 
110 /*
111  * forward references
112  */
113 static int xtSearch(struct inode *ip,
114 		    s64 xoff, int *cmpp, struct btstack * btstack, int flag);
115 
116 static int xtSplitUp(tid_t tid,
117 		     struct inode *ip,
118 		     struct xtsplit * split, struct btstack * btstack);
119 
120 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
121 		       struct metapage ** rmpp, s64 * rbnp);
122 
123 static int xtSplitRoot(tid_t tid, struct inode *ip,
124 		       struct xtsplit * split, struct metapage ** rmpp);
125 
126 #ifdef _STILL_TO_PORT
127 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
128 		      xtpage_t * fp, struct btstack * btstack);
129 
130 static int xtSearchNode(struct inode *ip,
131 			xad_t * xad,
132 			int *cmpp, struct btstack * btstack, int flag);
133 
134 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
135 #endif				/*  _STILL_TO_PORT */
136 
137 /* External references */
138 
139 /*
140  *      debug control
141  */
142 /*      #define _JFS_DEBUG_XTREE        1 */
143 
144 
145 /*
146  *      xtLookup()
147  *
148  * function: map a single page into a physical extent;
149  */
xtLookup(struct inode * ip,s64 lstart,s64 llen,int * pflag,s64 * paddr,s32 * plen,int no_check)150 int xtLookup(struct inode *ip, s64 lstart,
151 	     s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
152 {
153 	int rc = 0;
154 	struct btstack btstack;
155 	int cmp;
156 	s64 bn;
157 	struct metapage *mp;
158 	xtpage_t *p;
159 	int index;
160 	xad_t *xad;
161 	s64 size, xoff, xend;
162 	int xlen;
163 	s64 xaddr;
164 
165 	*plen = 0;
166 
167 	if (!no_check) {
168 		/* is lookup offset beyond eof ? */
169 		size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
170 		    JFS_SBI(ip->i_sb)->l2bsize;
171 		if (lstart >= size) {
172 			jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)",
173 				(ulong) lstart, (ulong) size);
174 			return 0;
175 		}
176 	}
177 
178 	/*
179 	 * search for the xad entry covering the logical extent
180 	 */
181 //search:
182 	if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) {
183 		jfs_err("xtLookup: xtSearch returned %d", rc);
184 		return rc;
185 	}
186 
187 	/*
188 	 *      compute the physical extent covering logical extent
189 	 *
190 	 * N.B. search may have failed (e.g., hole in sparse file),
191 	 * and returned the index of the next entry.
192 	 */
193 	/* retrieve search result */
194 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
195 
196 	/* is xad found covering start of logical extent ?
197 	 * lstart is a page start address,
198 	 * i.e., lstart cannot start in a hole;
199 	 */
200 	if (cmp)
201 		goto out;
202 
203 	/*
204 	 * lxd covered by xad
205 	 */
206 	xad = &p->xad[index];
207 	xoff = offsetXAD(xad);
208 	xlen = lengthXAD(xad);
209 	xend = xoff + xlen;
210 	xaddr = addressXAD(xad);
211 
212 	/* initialize new pxd */
213 	*pflag = xad->flag;
214 	*paddr = xaddr + (lstart - xoff);
215 	/* a page must be fully covered by an xad */
216 	*plen = min(xend - lstart, llen);
217 
218       out:
219 	XT_PUTPAGE(mp);
220 
221 	return rc;
222 }
223 
224 
225 /*
226  *      xtLookupList()
227  *
228  * function: map a single logical extent into a list of physical extent;
229  *
230  * parameter:
231  *      struct inode    *ip,
232  *      struct lxdlist  *lxdlist,       lxd list (in)
233  *      struct xadlist  *xadlist,       xad list (in/out)
234  *      int		flag)
235  *
236  * coverage of lxd by xad under assumption of
237  * . lxd's are ordered and disjoint.
238  * . xad's are ordered and disjoint.
239  *
240  * return:
241  *      0:      success
242  *
243  * note: a page being written (even a single byte) is backed fully,
244  *      except the last page which is only backed with blocks
245  *      required to cover the last byte;
246  *      the extent backing a page is fully contained within an xad;
247  */
xtLookupList(struct inode * ip,struct lxdlist * lxdlist,struct xadlist * xadlist,int flag)248 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
249 		 struct xadlist * xadlist, int flag)
250 {
251 	int rc = 0;
252 	struct btstack btstack;
253 	int cmp;
254 	s64 bn;
255 	struct metapage *mp;
256 	xtpage_t *p;
257 	int index;
258 	lxd_t *lxd;
259 	xad_t *xad, *pxd;
260 	s64 size, lstart, lend, xstart, xend, pstart;
261 	s64 llen, xlen, plen;
262 	s64 xaddr, paddr;
263 	int nlxd, npxd, maxnpxd;
264 
265 	npxd = xadlist->nxad = 0;
266 	maxnpxd = xadlist->maxnxad;
267 	pxd = xadlist->xad;
268 
269 	nlxd = lxdlist->nlxd;
270 	lxd = lxdlist->lxd;
271 
272 	lstart = offsetLXD(lxd);
273 	llen = lengthLXD(lxd);
274 	lend = lstart + llen;
275 
276 	size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
277 	    JFS_SBI(ip->i_sb)->l2bsize;
278 
279 	/*
280 	 * search for the xad entry covering the logical extent
281 	 */
282       search:
283 	if (lstart >= size)
284 		return 0;
285 
286 	if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0)))
287 		return rc;
288 
289 	/*
290 	 *      compute the physical extent covering logical extent
291 	 *
292 	 * N.B. search may have failed (e.g., hole in sparse file),
293 	 * and returned the index of the next entry.
294 	 */
295 //map:
296 	/* retrieve search result */
297 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
298 
299 	/* is xad on the next sibling page ? */
300 	if (index == le16_to_cpu(p->header.nextindex)) {
301 		if (p->header.flag & BT_ROOT)
302 			goto mapend;
303 
304 		if ((bn = le64_to_cpu(p->header.next)) == 0)
305 			goto mapend;
306 
307 		XT_PUTPAGE(mp);
308 
309 		/* get next sibling page */
310 		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
311 		if (rc)
312 			return rc;
313 
314 		index = XTENTRYSTART;
315 	}
316 
317 	xad = &p->xad[index];
318 
319 	/*
320 	 * is lxd covered by xad ?
321 	 */
322       compare:
323 	xstart = offsetXAD(xad);
324 	xlen = lengthXAD(xad);
325 	xend = xstart + xlen;
326 	xaddr = addressXAD(xad);
327 
328       compare1:
329 	if (xstart < lstart)
330 		goto compare2;
331 
332 	/* (lstart <= xstart) */
333 
334 	/* lxd is NOT covered by xad */
335 	if (lend <= xstart) {
336 		/*
337 		 * get next lxd
338 		 */
339 		if (--nlxd == 0)
340 			goto mapend;
341 		lxd++;
342 
343 		lstart = offsetLXD(lxd);
344 		llen = lengthLXD(lxd);
345 		lend = lstart + llen;
346 		if (lstart >= size)
347 			goto mapend;
348 
349 		/* compare with the current xad  */
350 		goto compare1;
351 	}
352 	/* lxd is covered by xad */
353 	else {			/* (xstart < lend) */
354 
355 		/* initialize new pxd */
356 		pstart = xstart;
357 		plen = min(lend - xstart, xlen);
358 		paddr = xaddr;
359 
360 		goto cover;
361 	}
362 
363 	/* (xstart < lstart) */
364       compare2:
365 	/* lxd is covered by xad */
366 	if (lstart < xend) {
367 		/* initialize new pxd */
368 		pstart = lstart;
369 		plen = min(xend - lstart, llen);
370 		paddr = xaddr + (lstart - xstart);
371 
372 		goto cover;
373 	}
374 	/* lxd is NOT covered by xad */
375 	else {			/* (xend <= lstart) */
376 
377 		/*
378 		 * get next xad
379 		 *
380 		 * linear search next xad covering lxd on
381 		 * the current xad page, and then tree search
382 		 */
383 		if (index == le16_to_cpu(p->header.nextindex) - 1) {
384 			if (p->header.flag & BT_ROOT)
385 				goto mapend;
386 
387 			XT_PUTPAGE(mp);
388 			goto search;
389 		} else {
390 			index++;
391 			xad++;
392 
393 			/* compare with new xad */
394 			goto compare;
395 		}
396 	}
397 
398 	/*
399 	 * lxd is covered by xad and a new pxd has been initialized
400 	 * (lstart <= xstart < lend) or (xstart < lstart < xend)
401 	 */
402       cover:
403 	/* finalize pxd corresponding to current xad */
404 	XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr);
405 
406 	if (++npxd >= maxnpxd)
407 		goto mapend;
408 	pxd++;
409 
410 	/*
411 	 * lxd is fully covered by xad
412 	 */
413 	if (lend <= xend) {
414 		/*
415 		 * get next lxd
416 		 */
417 		if (--nlxd == 0)
418 			goto mapend;
419 		lxd++;
420 
421 		lstart = offsetLXD(lxd);
422 		llen = lengthLXD(lxd);
423 		lend = lstart + llen;
424 		if (lstart >= size)
425 			goto mapend;
426 
427 		/*
428 		 * test for old xad covering new lxd
429 		 * (old xstart < new lstart)
430 		 */
431 		goto compare2;
432 	}
433 	/*
434 	 * lxd is partially covered by xad
435 	 */
436 	else {			/* (xend < lend)  */
437 
438 		/*
439 		 * get next xad
440 		 *
441 		 * linear search next xad covering lxd on
442 		 * the current xad page, and then next xad page search
443 		 */
444 		if (index == le16_to_cpu(p->header.nextindex) - 1) {
445 			if (p->header.flag & BT_ROOT)
446 				goto mapend;
447 
448 			if ((bn = le64_to_cpu(p->header.next)) == 0)
449 				goto mapend;
450 
451 			XT_PUTPAGE(mp);
452 
453 			/* get next sibling page */
454 			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
455 			if (rc)
456 				return rc;
457 
458 			index = XTENTRYSTART;
459 			xad = &p->xad[index];
460 		} else {
461 			index++;
462 			xad++;
463 		}
464 
465 		/*
466 		 * test for new xad covering old lxd
467 		 * (old lstart < new xstart)
468 		 */
469 		goto compare;
470 	}
471 
472       mapend:
473 	xadlist->nxad = npxd;
474 
475 //out:
476 	XT_PUTPAGE(mp);
477 
478 	return rc;
479 }
480 
481 
482 /*
483  *      xtSearch()
484  *
485  * function:    search for the xad entry covering specified offset.
486  *
487  * parameters:
488  *      ip      - file object;
489  *      xoff    - extent offset;
490  *      cmpp    - comparison result:
491  *      btstack - traverse stack;
492  *      flag    - search process flag (XT_INSERT);
493  *
494  * returns:
495  *      btstack contains (bn, index) of search path traversed to the entry.
496  *      *cmpp is set to result of comparison with the entry returned.
497  *      the page containing the entry is pinned at exit.
498  */
xtSearch(struct inode * ip,s64 xoff,int * cmpp,struct btstack * btstack,int flag)499 static int xtSearch(struct inode *ip, s64 xoff,	/* offset of extent */
500 		    int *cmpp, struct btstack * btstack, int flag)
501 {
502 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
503 	int rc = 0;
504 	int cmp = 1;		/* init for empty page */
505 	s64 bn;			/* block number */
506 	struct metapage *mp;	/* page buffer */
507 	xtpage_t *p;		/* page */
508 	xad_t *xad;
509 	int base, index, lim, btindex;
510 	struct btframe *btsp;
511 	int nsplit = 0;		/* number of pages to split */
512 	s64 t64;
513 
514 	INCREMENT(xtStat.search);
515 
516 	BT_CLR(btstack);
517 
518 	btstack->nsplit = 0;
519 
520 	/*
521 	 *      search down tree from root:
522 	 *
523 	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
524 	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
525 	 *
526 	 * if entry with search key K is not found
527 	 * internal page search find the entry with largest key Ki
528 	 * less than K which point to the child page to search;
529 	 * leaf page search find the entry with smallest key Kj
530 	 * greater than K so that the returned index is the position of
531 	 * the entry to be shifted right for insertion of new entry.
532 	 * for empty tree, search key is greater than any key of the tree.
533 	 *
534 	 * by convention, root bn = 0.
535 	 */
536 	for (bn = 0;;) {
537 		/* get/pin the page to search */
538 		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
539 		if (rc)
540 			return rc;
541 
542 		/* try sequential access heuristics with the previous
543 		 * access entry in target leaf page:
544 		 * once search narrowed down into the target leaf,
545 		 * key must either match an entry in the leaf or
546 		 * key entry does not exist in the tree;
547 		 */
548 //fastSearch:
549 		if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
550 		    (p->header.flag & BT_LEAF) &&
551 		    (index = jfs_ip->btindex) <
552 		    le16_to_cpu(p->header.nextindex)) {
553 			xad = &p->xad[index];
554 			t64 = offsetXAD(xad);
555 			if (xoff < t64 + lengthXAD(xad)) {
556 				if (xoff >= t64) {
557 					*cmpp = 0;
558 					goto out;
559 				}
560 
561 				/* stop sequential access heuristics */
562 				goto binarySearch;
563 			} else {	/* (t64 + lengthXAD(xad)) <= xoff */
564 
565 				/* try next sequential entry */
566 				index++;
567 				if (index <
568 				    le16_to_cpu(p->header.nextindex)) {
569 					xad++;
570 					t64 = offsetXAD(xad);
571 					if (xoff < t64 + lengthXAD(xad)) {
572 						if (xoff >= t64) {
573 							*cmpp = 0;
574 							goto out;
575 						}
576 
577 						/* miss: key falls between
578 						 * previous and this entry
579 						 */
580 						*cmpp = 1;
581 						goto out;
582 					}
583 
584 					/* (xoff >= t64 + lengthXAD(xad));
585 					 * matching entry may be further out:
586 					 * stop heuristic search
587 					 */
588 					/* stop sequential access heuristics */
589 					goto binarySearch;
590 				}
591 
592 				/* (index == p->header.nextindex);
593 				 * miss: key entry does not exist in
594 				 * the target leaf/tree
595 				 */
596 				*cmpp = 1;
597 				goto out;
598 			}
599 
600 			/*
601 			 * if hit, return index of the entry found, and
602 			 * if miss, where new entry with search key is
603 			 * to be inserted;
604 			 */
605 		      out:
606 			/* compute number of pages to split */
607 			if (flag & XT_INSERT) {
608 				if (p->header.nextindex ==	/* little-endian */
609 				    p->header.maxentry)
610 					nsplit++;
611 				else
612 					nsplit = 0;
613 				btstack->nsplit = nsplit;
614 			}
615 
616 			/* save search result */
617 			btsp = btstack->top;
618 			btsp->bn = bn;
619 			btsp->index = index;
620 			btsp->mp = mp;
621 
622 			/* update sequential access heuristics */
623 			jfs_ip->btindex = index;
624 
625 			INCREMENT(xtStat.fastSearch);
626 			return 0;
627 		}
628 
629 		/* well, ... full search now */
630 	      binarySearch:
631 		lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
632 
633 		/*
634 		 * binary search with search key K on the current page
635 		 */
636 		for (base = XTENTRYSTART; lim; lim >>= 1) {
637 			index = base + (lim >> 1);
638 
639 			XT_CMP(cmp, xoff, &p->xad[index], t64);
640 			if (cmp == 0) {
641 				/*
642 				 *      search hit
643 				 */
644 				/* search hit - leaf page:
645 				 * return the entry found
646 				 */
647 				if (p->header.flag & BT_LEAF) {
648 					*cmpp = cmp;
649 
650 					/* compute number of pages to split */
651 					if (flag & XT_INSERT) {
652 						if (p->header.nextindex ==
653 						    p->header.maxentry)
654 							nsplit++;
655 						else
656 							nsplit = 0;
657 						btstack->nsplit = nsplit;
658 					}
659 
660 					/* save search result */
661 					btsp = btstack->top;
662 					btsp->bn = bn;
663 					btsp->index = index;
664 					btsp->mp = mp;
665 
666 					/* init sequential access heuristics */
667 					btindex = jfs_ip->btindex;
668 					if (index == btindex ||
669 					    index == btindex + 1)
670 						jfs_ip->btorder = BT_SEQUENTIAL;
671 					else
672 						jfs_ip->btorder = BT_RANDOM;
673 					jfs_ip->btindex = index;
674 
675 					return 0;
676 				}
677 
678 				/* search hit - internal page:
679 				 * descend/search its child page
680 				 */
681 				goto next;
682 			}
683 
684 			if (cmp > 0) {
685 				base = index + 1;
686 				--lim;
687 			}
688 		}
689 
690 		/*
691 		 *      search miss
692 		 *
693 		 * base is the smallest index with key (Kj) greater than
694 		 * search key (K) and may be zero or maxentry index.
695 		 */
696 		/*
697 		 * search miss - leaf page:
698 		 *
699 		 * return location of entry (base) where new entry with
700 		 * search key K is to be inserted.
701 		 */
702 		if (p->header.flag & BT_LEAF) {
703 			*cmpp = cmp;
704 
705 			/* compute number of pages to split */
706 			if (flag & XT_INSERT) {
707 				if (p->header.nextindex ==
708 				    p->header.maxentry)
709 					nsplit++;
710 				else
711 					nsplit = 0;
712 				btstack->nsplit = nsplit;
713 			}
714 
715 			/* save search result */
716 			btsp = btstack->top;
717 			btsp->bn = bn;
718 			btsp->index = base;
719 			btsp->mp = mp;
720 
721 			/* init sequential access heuristics */
722 			btindex = jfs_ip->btindex;
723 			if (base == btindex || base == btindex + 1)
724 				jfs_ip->btorder = BT_SEQUENTIAL;
725 			else
726 				jfs_ip->btorder = BT_RANDOM;
727 			jfs_ip->btindex = base;
728 
729 			return 0;
730 		}
731 
732 		/*
733 		 * search miss - non-leaf page:
734 		 *
735 		 * if base is non-zero, decrement base by one to get the parent
736 		 * entry of the child page to search.
737 		 */
738 		index = base ? base - 1 : base;
739 
740 		/*
741 		 * go down to child page
742 		 */
743 	      next:
744 		/* update number of pages to split */
745 		if (p->header.nextindex == p->header.maxentry)
746 			nsplit++;
747 		else
748 			nsplit = 0;
749 
750 		/* push (bn, index) of the parent page/entry */
751 		BT_PUSH(btstack, bn, index);
752 
753 		/* get the child page block number */
754 		bn = addressXAD(&p->xad[index]);
755 
756 		/* unpin the parent page */
757 		XT_PUTPAGE(mp);
758 	}
759 }
760 
761 /*
762  *      xtInsert()
763  *
764  * function:
765  *
766  * parameter:
767  *      tid     - transaction id;
768  *      ip      - file object;
769  *      xflag   - extent flag (XAD_NOTRECORDED):
770  *      xoff    - extent offset;
771  *      xlen    - extent length;
772  *      xaddrp  - extent address pointer (in/out):
773  *              if (*xaddrp)
774  *                      caller allocated data extent at *xaddrp;
775  *              else
776  *                      allocate data extent and return its xaddr;
777  *      flag    -
778  *
779  * return:
780  */
xtInsert(tid_t tid,struct inode * ip,int xflag,s64 xoff,s32 xlen,s64 * xaddrp,int flag)781 int xtInsert(tid_t tid,		/* transaction id */
782 	     struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
783 	     int flag)
784 {
785 	int rc = 0;
786 	s64 xaddr, hint;
787 	struct metapage *mp;	/* meta-page buffer */
788 	xtpage_t *p;		/* base B+-tree index page */
789 	s64 bn;
790 	int index, nextindex;
791 	struct btstack btstack;	/* traverse stack */
792 	struct xtsplit split;	/* split information */
793 	xad_t *xad;
794 	int cmp;
795 	struct tlock *tlck;
796 	struct xtlock *xtlck;
797 
798 	jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
799 
800 	/*
801 	 *      search for the entry location at which to insert:
802 	 *
803 	 * xtFastSearch() and xtSearch() both returns (leaf page
804 	 * pinned, index at which to insert).
805 	 * n.b. xtSearch() may return index of maxentry of
806 	 * the full page.
807 	 */
808 	if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
809 		return rc;
810 
811 	/* retrieve search result */
812 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
813 
814 	/* This test must follow XT_GETSEARCH since mp must be valid if
815 	 * we branch to out: */
816 	if (cmp == 0) {
817 		rc = -EEXIST;
818 		goto out;
819 	}
820 
821 	/*
822 	 * allocate data extent requested
823 	 *
824 	 * allocation hint: last xad
825 	 */
826 	if ((xaddr = *xaddrp) == 0) {
827 		if (index > XTENTRYSTART) {
828 			xad = &p->xad[index - 1];
829 			hint = addressXAD(xad) + lengthXAD(xad) - 1;
830 		} else
831 			hint = 0;
832 		if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr)))
833 			goto out;
834 	}
835 
836 	/*
837 	 *      insert entry for new extent
838 	 */
839 	xflag |= XAD_NEW;
840 
841 	/*
842 	 *      if the leaf page is full, split the page and
843 	 *      propagate up the router entry for the new page from split
844 	 *
845 	 * The xtSplitUp() will insert the entry and unpin the leaf page.
846 	 */
847 	nextindex = le16_to_cpu(p->header.nextindex);
848 	if (nextindex == le16_to_cpu(p->header.maxentry)) {
849 		split.mp = mp;
850 		split.index = index;
851 		split.flag = xflag;
852 		split.off = xoff;
853 		split.len = xlen;
854 		split.addr = xaddr;
855 		split.pxdlist = NULL;
856 		if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
857 			/* undo data extent allocation */
858 			if (*xaddrp == 0)
859 				dbFree(ip, xaddr, (s64) xlen);
860 			return rc;
861 		}
862 
863 		*xaddrp = xaddr;
864 		return 0;
865 	}
866 
867 	/*
868 	 *      insert the new entry into the leaf page
869 	 */
870 	/*
871 	 * acquire a transaction lock on the leaf page;
872 	 *
873 	 * action: xad insertion/extension;
874 	 */
875 	BT_MARK_DIRTY(mp, ip);
876 
877 	/* if insert into middle, shift right remaining entries. */
878 	if (index < nextindex)
879 		memmove(&p->xad[index + 1], &p->xad[index],
880 			(nextindex - index) * sizeof(xad_t));
881 
882 	/* insert the new entry: mark the entry NEW */
883 	xad = &p->xad[index];
884 	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
885 
886 	/* advance next available entry index */
887 	p->header.nextindex =
888 	    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
889 
890 	/* Don't log it if there are no links to the file */
891 	if (!test_cflag(COMMIT_Nolink, ip)) {
892 		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
893 		xtlck = (struct xtlock *) & tlck->lock;
894 		xtlck->lwm.offset =
895 		    (xtlck->lwm.offset) ? min(index,
896 					      (int)xtlck->lwm.offset) : index;
897 		xtlck->lwm.length =
898 		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
899 	}
900 
901 	*xaddrp = xaddr;
902 
903       out:
904 	/* unpin the leaf page */
905 	XT_PUTPAGE(mp);
906 
907 	return rc;
908 }
909 
910 
911 /*
912  *      xtSplitUp()
913  *
914  * function:
915  *      split full pages as propagating insertion up the tree
916  *
917  * parameter:
918  *      tid     - transaction id;
919  *      ip      - file object;
920  *      split   - entry parameter descriptor;
921  *      btstack - traverse stack from xtSearch()
922  *
923  * return:
924  */
925 static int
xtSplitUp(tid_t tid,struct inode * ip,struct xtsplit * split,struct btstack * btstack)926 xtSplitUp(tid_t tid,
927 	  struct inode *ip, struct xtsplit * split, struct btstack * btstack)
928 {
929 	int rc = 0;
930 	struct metapage *smp;
931 	xtpage_t *sp;		/* split page */
932 	struct metapage *rmp;
933 	s64 rbn;		/* new right page block number */
934 	struct metapage *rcmp;
935 	xtpage_t *rcp;		/* right child page */
936 	s64 rcbn;		/* right child page block number */
937 	int skip;		/* index of entry of insertion */
938 	int nextindex;		/* next available entry index of p */
939 	struct btframe *parent;	/* parent page entry on traverse stack */
940 	xad_t *xad;
941 	s64 xaddr;
942 	int xlen;
943 	int nsplit;		/* number of pages split */
944 	struct pxdlist pxdlist;
945 	pxd_t *pxd;
946 	struct tlock *tlck;
947 	struct xtlock *xtlck;
948 
949 	smp = split->mp;
950 	sp = XT_PAGE(ip, smp);
951 
952 	/* is inode xtree root extension/inline EA area free ? */
953 	if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
954 	    (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
955 	    (JFS_IP(ip)->mode2 & INLINEEA)) {
956 		sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
957 		JFS_IP(ip)->mode2 &= ~INLINEEA;
958 
959 		BT_MARK_DIRTY(smp, ip);
960 		/*
961 		 * acquire a transaction lock on the leaf page;
962 		 *
963 		 * action: xad insertion/extension;
964 		 */
965 
966 		/* if insert into middle, shift right remaining entries. */
967 		skip = split->index;
968 		nextindex = le16_to_cpu(sp->header.nextindex);
969 		if (skip < nextindex)
970 			memmove(&sp->xad[skip + 1], &sp->xad[skip],
971 				(nextindex - skip) * sizeof(xad_t));
972 
973 		/* insert the new entry: mark the entry NEW */
974 		xad = &sp->xad[skip];
975 		XT_PUTENTRY(xad, split->flag, split->off, split->len,
976 			    split->addr);
977 
978 		/* advance next available entry index */
979 		sp->header.nextindex =
980 		    cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1);
981 
982 		/* Don't log it if there are no links to the file */
983 		if (!test_cflag(COMMIT_Nolink, ip)) {
984 			tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
985 			xtlck = (struct xtlock *) & tlck->lock;
986 			xtlck->lwm.offset = (xtlck->lwm.offset) ?
987 			    min(skip, (int)xtlck->lwm.offset) : skip;
988 			xtlck->lwm.length =
989 			    le16_to_cpu(sp->header.nextindex) -
990 			    xtlck->lwm.offset;
991 		}
992 
993 		return 0;
994 	}
995 
996 	/*
997 	 * allocate new index blocks to cover index page split(s)
998 	 *
999 	 * allocation hint: ?
1000 	 */
1001 	if (split->pxdlist == NULL) {
1002 		nsplit = btstack->nsplit;
1003 		split->pxdlist = &pxdlist;
1004 		pxdlist.maxnpxd = pxdlist.npxd = 0;
1005 		pxd = &pxdlist.pxd[0];
1006 		xlen = JFS_SBI(ip->i_sb)->nbperpage;
1007 		for (; nsplit > 0; nsplit--, pxd++) {
1008 			if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
1009 			    == 0) {
1010 				PXDaddress(pxd, xaddr);
1011 				PXDlength(pxd, xlen);
1012 
1013 				pxdlist.maxnpxd++;
1014 
1015 				continue;
1016 			}
1017 
1018 			/* undo allocation */
1019 
1020 			XT_PUTPAGE(smp);
1021 			return rc;
1022 		}
1023 	}
1024 
1025 	/*
1026 	 * Split leaf page <sp> into <sp> and a new right page <rp>.
1027 	 *
1028 	 * The split routines insert the new entry into the leaf page,
1029 	 * and acquire txLock as appropriate.
1030 	 * return <rp> pinned and its block number <rpbn>.
1031 	 */
1032 	rc = (sp->header.flag & BT_ROOT) ?
1033 	    xtSplitRoot(tid, ip, split, &rmp) :
1034 	    xtSplitPage(tid, ip, split, &rmp, &rbn);
1035 
1036 	XT_PUTPAGE(smp);
1037 
1038 	if (rc)
1039 		return -EIO;
1040 	/*
1041 	 * propagate up the router entry for the leaf page just split
1042 	 *
1043 	 * insert a router entry for the new page into the parent page,
1044 	 * propagate the insert/split up the tree by walking back the stack
1045 	 * of (bn of parent page, index of child page entry in parent page)
1046 	 * that were traversed during the search for the page that split.
1047 	 *
1048 	 * the propagation of insert/split up the tree stops if the root
1049 	 * splits or the page inserted into doesn't have to split to hold
1050 	 * the new entry.
1051 	 *
1052 	 * the parent entry for the split page remains the same, and
1053 	 * a new entry is inserted at its right with the first key and
1054 	 * block number of the new right page.
1055 	 *
1056 	 * There are a maximum of 3 pages pinned at any time:
1057 	 * right child, left parent and right parent (when the parent splits)
1058 	 * to keep the child page pinned while working on the parent.
1059 	 * make sure that all pins are released at exit.
1060 	 */
1061 	while ((parent = BT_POP(btstack)) != NULL) {
1062 		/* parent page specified by stack frame <parent> */
1063 
1064 		/* keep current child pages <rcp> pinned */
1065 		rcmp = rmp;
1066 		rcbn = rbn;
1067 		rcp = XT_PAGE(ip, rcmp);
1068 
1069 		/*
1070 		 * insert router entry in parent for new right child page <rp>
1071 		 */
1072 		/* get/pin the parent page <sp> */
1073 		XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1074 		if (rc) {
1075 			XT_PUTPAGE(rcmp);
1076 			return rc;
1077 		}
1078 
1079 		/*
1080 		 * The new key entry goes ONE AFTER the index of parent entry,
1081 		 * because the split was to the right.
1082 		 */
1083 		skip = parent->index + 1;
1084 
1085 		/*
1086 		 * split or shift right remaining entries of the parent page
1087 		 */
1088 		nextindex = le16_to_cpu(sp->header.nextindex);
1089 		/*
1090 		 * parent page is full - split the parent page
1091 		 */
1092 		if (nextindex == le16_to_cpu(sp->header.maxentry)) {
1093 			/* init for parent page split */
1094 			split->mp = smp;
1095 			split->index = skip;	/* index at insert */
1096 			split->flag = XAD_NEW;
1097 			split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
1098 			split->len = JFS_SBI(ip->i_sb)->nbperpage;
1099 			split->addr = rcbn;
1100 
1101 			/* unpin previous right child page */
1102 			XT_PUTPAGE(rcmp);
1103 
1104 			/* The split routines insert the new entry,
1105 			 * and acquire txLock as appropriate.
1106 			 * return <rp> pinned and its block number <rpbn>.
1107 			 */
1108 			rc = (sp->header.flag & BT_ROOT) ?
1109 			    xtSplitRoot(tid, ip, split, &rmp) :
1110 			    xtSplitPage(tid, ip, split, &rmp, &rbn);
1111 			if (rc) {
1112 				XT_PUTPAGE(smp);
1113 				return rc;
1114 			}
1115 
1116 			XT_PUTPAGE(smp);
1117 			/* keep new child page <rp> pinned */
1118 		}
1119 		/*
1120 		 * parent page is not full - insert in parent page
1121 		 */
1122 		else {
1123 			/*
1124 			 * insert router entry in parent for the right child
1125 			 * page from the first entry of the right child page:
1126 			 */
1127 			/*
1128 			 * acquire a transaction lock on the parent page;
1129 			 *
1130 			 * action: router xad insertion;
1131 			 */
1132 			BT_MARK_DIRTY(smp, ip);
1133 
1134 			/*
1135 			 * if insert into middle, shift right remaining entries
1136 			 */
1137 			if (skip < nextindex)
1138 				memmove(&sp->xad[skip + 1], &sp->xad[skip],
1139 					(nextindex -
1140 					 skip) << L2XTSLOTSIZE);
1141 
1142 			/* insert the router entry */
1143 			xad = &sp->xad[skip];
1144 			XT_PUTENTRY(xad, XAD_NEW,
1145 				    offsetXAD(&rcp->xad[XTENTRYSTART]),
1146 				    JFS_SBI(ip->i_sb)->nbperpage, rcbn);
1147 
1148 			/* advance next available entry index. */
1149 			sp->header.nextindex =
1150 			    cpu_to_le16(le16_to_cpu(sp->header.nextindex) +
1151 					1);
1152 
1153 			/* Don't log it if there are no links to the file */
1154 			if (!test_cflag(COMMIT_Nolink, ip)) {
1155 				tlck = txLock(tid, ip, smp,
1156 					      tlckXTREE | tlckGROW);
1157 				xtlck = (struct xtlock *) & tlck->lock;
1158 				xtlck->lwm.offset = (xtlck->lwm.offset) ?
1159 				    min(skip, (int)xtlck->lwm.offset) : skip;
1160 				xtlck->lwm.length =
1161 				    le16_to_cpu(sp->header.nextindex) -
1162 				    xtlck->lwm.offset;
1163 			}
1164 
1165 			/* unpin parent page */
1166 			XT_PUTPAGE(smp);
1167 
1168 			/* exit propagate up */
1169 			break;
1170 		}
1171 	}
1172 
1173 	/* unpin current right page */
1174 	XT_PUTPAGE(rmp);
1175 
1176 	return 0;
1177 }
1178 
1179 
1180 /*
1181  *      xtSplitPage()
1182  *
1183  * function:
1184  *      split a full non-root page into
1185  *      original/split/left page and new right page
1186  *      i.e., the original/split page remains as left page.
1187  *
1188  * parameter:
1189  *      int		tid,
1190  *      struct inode    *ip,
1191  *      struct xtsplit  *split,
1192  *      struct metapage	**rmpp,
1193  *      u64		*rbnp,
1194  *
1195  * return:
1196  *      Pointer to page in which to insert or NULL on error.
1197  */
1198 static int
xtSplitPage(tid_t tid,struct inode * ip,struct xtsplit * split,struct metapage ** rmpp,s64 * rbnp)1199 xtSplitPage(tid_t tid, struct inode *ip,
1200 	    struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
1201 {
1202 	int rc = 0;
1203 	struct metapage *smp;
1204 	xtpage_t *sp;
1205 	struct metapage *rmp;
1206 	xtpage_t *rp;		/* new right page allocated */
1207 	s64 rbn;		/* new right page block number */
1208 	struct metapage *mp;
1209 	xtpage_t *p;
1210 	s64 nextbn;
1211 	int skip, maxentry, middle, righthalf, n;
1212 	xad_t *xad;
1213 	struct pxdlist *pxdlist;
1214 	pxd_t *pxd;
1215 	struct tlock *tlck;
1216 	struct xtlock *sxtlck = 0, *rxtlck = 0;
1217 
1218 	smp = split->mp;
1219 	sp = XT_PAGE(ip, smp);
1220 
1221 	INCREMENT(xtStat.split);
1222 
1223 	/*
1224 	 * allocate the new right page for the split
1225 	 */
1226 	pxdlist = split->pxdlist;
1227 	pxd = &pxdlist->pxd[pxdlist->npxd];
1228 	pxdlist->npxd++;
1229 	rbn = addressPXD(pxd);
1230 	rmp = get_metapage(ip, rbn, PSIZE, 1);
1231 	if (rmp == NULL)
1232 		return -EIO;
1233 
1234 	jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1235 
1236 	BT_MARK_DIRTY(rmp, ip);
1237 	/*
1238 	 * action: new page;
1239 	 */
1240 
1241 	rp = (xtpage_t *) rmp->data;
1242 	rp->header.self = *pxd;
1243 	rp->header.flag = sp->header.flag & BT_TYPE;
1244 	rp->header.maxentry = sp->header.maxentry;	/* little-endian */
1245 	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1246 
1247 	BT_MARK_DIRTY(smp, ip);
1248 	/* Don't log it if there are no links to the file */
1249 	if (!test_cflag(COMMIT_Nolink, ip)) {
1250 		/*
1251 		 * acquire a transaction lock on the new right page;
1252 		 */
1253 		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1254 		rxtlck = (struct xtlock *) & tlck->lock;
1255 		rxtlck->lwm.offset = XTENTRYSTART;
1256 		/*
1257 		 * acquire a transaction lock on the split page
1258 		 */
1259 		tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1260 		sxtlck = (struct xtlock *) & tlck->lock;
1261 	}
1262 
1263 	/*
1264 	 * initialize/update sibling pointers of <sp> and <rp>
1265 	 */
1266 	nextbn = le64_to_cpu(sp->header.next);
1267 	rp->header.next = cpu_to_le64(nextbn);
1268 	rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1269 	sp->header.next = cpu_to_le64(rbn);
1270 
1271 	skip = split->index;
1272 
1273 	/*
1274 	 *      sequential append at tail (after last entry of last page)
1275 	 *
1276 	 * if splitting the last page on a level because of appending
1277 	 * a entry to it (skip is maxentry), it's likely that the access is
1278 	 * sequential. adding an empty page on the side of the level is less
1279 	 * work and can push the fill factor much higher than normal.
1280 	 * if we're wrong it's no big deal -  we will do the split the right
1281 	 * way next time.
1282 	 * (it may look like it's equally easy to do a similar hack for
1283 	 * reverse sorted data, that is, split the tree left, but it's not.
1284 	 * Be my guest.)
1285 	 */
1286 	if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1287 		/*
1288 		 * acquire a transaction lock on the new/right page;
1289 		 *
1290 		 * action: xad insertion;
1291 		 */
1292 		/* insert entry at the first entry of the new right page */
1293 		xad = &rp->xad[XTENTRYSTART];
1294 		XT_PUTENTRY(xad, split->flag, split->off, split->len,
1295 			    split->addr);
1296 
1297 		rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1298 
1299 		if (!test_cflag(COMMIT_Nolink, ip)) {
1300 			/* rxtlck->lwm.offset = XTENTRYSTART; */
1301 			rxtlck->lwm.length = 1;
1302 		}
1303 
1304 		*rmpp = rmp;
1305 		*rbnp = rbn;
1306 
1307 		ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
1308 
1309 		jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1310 		return 0;
1311 	}
1312 
1313 	/*
1314 	 *      non-sequential insert (at possibly middle page)
1315 	 */
1316 
1317 	/*
1318 	 * update previous pointer of old next/right page of <sp>
1319 	 */
1320 	if (nextbn != 0) {
1321 		XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1322 		if (rc) {
1323 			XT_PUTPAGE(rmp);
1324 			return rc;
1325 		}
1326 
1327 		BT_MARK_DIRTY(mp, ip);
1328 		/*
1329 		 * acquire a transaction lock on the next page;
1330 		 *
1331 		 * action:sibling pointer update;
1332 		 */
1333 		if (!test_cflag(COMMIT_Nolink, ip))
1334 			tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1335 
1336 		p->header.prev = cpu_to_le64(rbn);
1337 
1338 		/* sibling page may have been updated previously, or
1339 		 * it may be updated later;
1340 		 */
1341 
1342 		XT_PUTPAGE(mp);
1343 	}
1344 
1345 	/*
1346 	 * split the data between the split and new/right pages
1347 	 */
1348 	maxentry = le16_to_cpu(sp->header.maxentry);
1349 	middle = maxentry >> 1;
1350 	righthalf = maxentry - middle;
1351 
1352 	/*
1353 	 * skip index in old split/left page - insert into left page:
1354 	 */
1355 	if (skip <= middle) {
1356 		/* move right half of split page to the new right page */
1357 		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1358 			righthalf << L2XTSLOTSIZE);
1359 
1360 		/* shift right tail of left half to make room for new entry */
1361 		if (skip < middle)
1362 			memmove(&sp->xad[skip + 1], &sp->xad[skip],
1363 				(middle - skip) << L2XTSLOTSIZE);
1364 
1365 		/* insert new entry */
1366 		xad = &sp->xad[skip];
1367 		XT_PUTENTRY(xad, split->flag, split->off, split->len,
1368 			    split->addr);
1369 
1370 		/* update page header */
1371 		sp->header.nextindex = cpu_to_le16(middle + 1);
1372 		if (!test_cflag(COMMIT_Nolink, ip)) {
1373 			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1374 			    min(skip, (int)sxtlck->lwm.offset) : skip;
1375 		}
1376 
1377 		rp->header.nextindex =
1378 		    cpu_to_le16(XTENTRYSTART + righthalf);
1379 	}
1380 	/*
1381 	 * skip index in new right page - insert into right page:
1382 	 */
1383 	else {
1384 		/* move left head of right half to right page */
1385 		n = skip - middle;
1386 		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1387 			n << L2XTSLOTSIZE);
1388 
1389 		/* insert new entry */
1390 		n += XTENTRYSTART;
1391 		xad = &rp->xad[n];
1392 		XT_PUTENTRY(xad, split->flag, split->off, split->len,
1393 			    split->addr);
1394 
1395 		/* move right tail of right half to right page */
1396 		if (skip < maxentry)
1397 			memmove(&rp->xad[n + 1], &sp->xad[skip],
1398 				(maxentry - skip) << L2XTSLOTSIZE);
1399 
1400 		/* update page header */
1401 		sp->header.nextindex = cpu_to_le16(middle);
1402 		if (!test_cflag(COMMIT_Nolink, ip)) {
1403 			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1404 			    min(middle, (int)sxtlck->lwm.offset) : middle;
1405 		}
1406 
1407 		rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1408 						   righthalf + 1);
1409 	}
1410 
1411 	if (!test_cflag(COMMIT_Nolink, ip)) {
1412 		sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1413 		    sxtlck->lwm.offset;
1414 
1415 		/* rxtlck->lwm.offset = XTENTRYSTART; */
1416 		rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1417 		    XTENTRYSTART;
1418 	}
1419 
1420 	*rmpp = rmp;
1421 	*rbnp = rbn;
1422 
1423 	ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
1424 
1425 	jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1426 	return rc;
1427 }
1428 
1429 
1430 /*
1431  *      xtSplitRoot()
1432  *
1433  * function:
1434  *      split the full root page into
1435  *      original/root/split page and new right page
1436  *      i.e., root remains fixed in tree anchor (inode) and
1437  *      the root is copied to a single new right child page
1438  *      since root page << non-root page, and
1439  *      the split root page contains a single entry for the
1440  *      new right child page.
1441  *
1442  * parameter:
1443  *      int		tid,
1444  *      struct inode    *ip,
1445  *      struct xtsplit  *split,
1446  *      struct metapage	**rmpp)
1447  *
1448  * return:
1449  *      Pointer to page in which to insert or NULL on error.
1450  */
1451 static int
xtSplitRoot(tid_t tid,struct inode * ip,struct xtsplit * split,struct metapage ** rmpp)1452 xtSplitRoot(tid_t tid,
1453 	    struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1454 {
1455 	xtpage_t *sp;
1456 	struct metapage *rmp;
1457 	xtpage_t *rp;
1458 	s64 rbn;
1459 	int skip, nextindex;
1460 	xad_t *xad;
1461 	pxd_t *pxd;
1462 	struct pxdlist *pxdlist;
1463 	struct tlock *tlck;
1464 	struct xtlock *xtlck;
1465 
1466 	sp = &JFS_IP(ip)->i_xtroot;
1467 
1468 	INCREMENT(xtStat.split);
1469 
1470 	/*
1471 	 *      allocate a single (right) child page
1472 	 */
1473 	pxdlist = split->pxdlist;
1474 	pxd = &pxdlist->pxd[pxdlist->npxd];
1475 	pxdlist->npxd++;
1476 	rbn = addressPXD(pxd);
1477 	rmp = get_metapage(ip, rbn, PSIZE, 1);
1478 	if (rmp == NULL)
1479 		return -EIO;
1480 
1481 	jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1482 
1483 	/*
1484 	 * acquire a transaction lock on the new right page;
1485 	 *
1486 	 * action: new page;
1487 	 */
1488 	BT_MARK_DIRTY(rmp, ip);
1489 
1490 	rp = (xtpage_t *) rmp->data;
1491 	rp->header.flag =
1492 	    (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1493 	rp->header.self = *pxd;
1494 	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1495 	rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1496 
1497 	/* initialize sibling pointers */
1498 	rp->header.next = 0;
1499 	rp->header.prev = 0;
1500 
1501 	/*
1502 	 * copy the in-line root page into new right page extent
1503 	 */
1504 	nextindex = le16_to_cpu(sp->header.maxentry);
1505 	memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1506 		(nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1507 
1508 	/*
1509 	 * insert the new entry into the new right/child page
1510 	 * (skip index in the new right page will not change)
1511 	 */
1512 	skip = split->index;
1513 	/* if insert into middle, shift right remaining entries */
1514 	if (skip != nextindex)
1515 		memmove(&rp->xad[skip + 1], &rp->xad[skip],
1516 			(nextindex - skip) * sizeof(xad_t));
1517 
1518 	xad = &rp->xad[skip];
1519 	XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1520 
1521 	/* update page header */
1522 	rp->header.nextindex = cpu_to_le16(nextindex + 1);
1523 
1524 	if (!test_cflag(COMMIT_Nolink, ip)) {
1525 		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1526 		xtlck = (struct xtlock *) & tlck->lock;
1527 		xtlck->lwm.offset = XTENTRYSTART;
1528 		xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1529 		    XTENTRYSTART;
1530 	}
1531 
1532 	/*
1533 	 *      reset the root
1534 	 *
1535 	 * init root with the single entry for the new right page
1536 	 * set the 1st entry offset to 0, which force the left-most key
1537 	 * at any level of the tree to be less than any search key.
1538 	 */
1539 	/*
1540 	 * acquire a transaction lock on the root page (in-memory inode);
1541 	 *
1542 	 * action: root split;
1543 	 */
1544 	BT_MARK_DIRTY(split->mp, ip);
1545 
1546 	xad = &sp->xad[XTENTRYSTART];
1547 	XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1548 
1549 	/* update page header of root */
1550 	sp->header.flag &= ~BT_LEAF;
1551 	sp->header.flag |= BT_INTERNAL;
1552 
1553 	sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1554 
1555 	if (!test_cflag(COMMIT_Nolink, ip)) {
1556 		tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1557 		xtlck = (struct xtlock *) & tlck->lock;
1558 		xtlck->lwm.offset = XTENTRYSTART;
1559 		xtlck->lwm.length = 1;
1560 	}
1561 
1562 	*rmpp = rmp;
1563 
1564 	ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
1565 
1566 	jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1567 	return 0;
1568 }
1569 
1570 
1571 /*
1572  *      xtExtend()
1573  *
1574  * function: extend in-place;
1575  *
1576  * note: existing extent may or may not have been committed.
1577  * caller is responsible for pager buffer cache update, and
1578  * working block allocation map update;
1579  * update pmap: alloc whole extended extent;
1580  */
xtExtend(tid_t tid,struct inode * ip,s64 xoff,s32 xlen,int flag)1581 int xtExtend(tid_t tid,		/* transaction id */
1582 	     struct inode *ip, s64 xoff,	/* delta extent offset */
1583 	     s32 xlen,		/* delta extent length */
1584 	     int flag)
1585 {
1586 	int rc = 0;
1587 	int cmp;
1588 	struct metapage *mp;	/* meta-page buffer */
1589 	xtpage_t *p;		/* base B+-tree index page */
1590 	s64 bn;
1591 	int index, nextindex, len;
1592 	struct btstack btstack;	/* traverse stack */
1593 	struct xtsplit split;	/* split information */
1594 	xad_t *xad;
1595 	s64 xaddr;
1596 	struct tlock *tlck;
1597 	struct xtlock *xtlck = 0;
1598 
1599 	jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1600 
1601 	/* there must exist extent to be extended */
1602 	if ((rc = xtSearch(ip, xoff - 1, &cmp, &btstack, XT_INSERT)))
1603 		return rc;
1604 
1605 	/* retrieve search result */
1606 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1607 
1608 	if (cmp != 0) {
1609 		XT_PUTPAGE(mp);
1610 		jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent");
1611 		return -EIO;
1612 	}
1613 
1614 	/* extension must be contiguous */
1615 	xad = &p->xad[index];
1616 	if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1617 		XT_PUTPAGE(mp);
1618 		jfs_error(ip->i_sb, "xtExtend: extension is not contiguous");
1619 		return -EIO;
1620 	}
1621 
1622 	/*
1623 	 * acquire a transaction lock on the leaf page;
1624 	 *
1625 	 * action: xad insertion/extension;
1626 	 */
1627 	BT_MARK_DIRTY(mp, ip);
1628 	if (!test_cflag(COMMIT_Nolink, ip)) {
1629 		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1630 		xtlck = (struct xtlock *) & tlck->lock;
1631 	}
1632 
1633 	/* extend will overflow extent ? */
1634 	xlen = lengthXAD(xad) + xlen;
1635 	if ((len = xlen - MAXXLEN) <= 0)
1636 		goto extendOld;
1637 
1638 	/*
1639 	 *      extent overflow: insert entry for new extent
1640 	 */
1641 //insertNew:
1642 	xoff = offsetXAD(xad) + MAXXLEN;
1643 	xaddr = addressXAD(xad) + MAXXLEN;
1644 	nextindex = le16_to_cpu(p->header.nextindex);
1645 
1646 	/*
1647 	 *      if the leaf page is full, insert the new entry and
1648 	 *      propagate up the router entry for the new page from split
1649 	 *
1650 	 * The xtSplitUp() will insert the entry and unpin the leaf page.
1651 	 */
1652 	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1653 		/* xtSpliUp() unpins leaf pages */
1654 		split.mp = mp;
1655 		split.index = index + 1;
1656 		split.flag = XAD_NEW;
1657 		split.off = xoff;	/* split offset */
1658 		split.len = len;
1659 		split.addr = xaddr;
1660 		split.pxdlist = NULL;
1661 		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1662 			return rc;
1663 
1664 		/* get back old page */
1665 		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1666 		if (rc)
1667 			return rc;
1668 		/*
1669 		 * if leaf root has been split, original root has been
1670 		 * copied to new child page, i.e., original entry now
1671 		 * resides on the new child page;
1672 		 */
1673 		if (p->header.flag & BT_INTERNAL) {
1674 			ASSERT(p->header.nextindex ==
1675 			       cpu_to_le16(XTENTRYSTART + 1));
1676 			xad = &p->xad[XTENTRYSTART];
1677 			bn = addressXAD(xad);
1678 			XT_PUTPAGE(mp);
1679 
1680 			/* get new child page */
1681 			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1682 			if (rc)
1683 				return rc;
1684 
1685 			BT_MARK_DIRTY(mp, ip);
1686 			if (!test_cflag(COMMIT_Nolink, ip)) {
1687 				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1688 				xtlck = (struct xtlock *) & tlck->lock;
1689 			}
1690 		}
1691 	}
1692 	/*
1693 	 *      insert the new entry into the leaf page
1694 	 */
1695 	else {
1696 		/* insert the new entry: mark the entry NEW */
1697 		xad = &p->xad[index + 1];
1698 		XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1699 
1700 		/* advance next available entry index */
1701 		p->header.nextindex =
1702 		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1703 	}
1704 
1705 	/* get back old entry */
1706 	xad = &p->xad[index];
1707 	xlen = MAXXLEN;
1708 
1709 	/*
1710 	 * extend old extent
1711 	 */
1712       extendOld:
1713 	XADlength(xad, xlen);
1714 	if (!(xad->flag & XAD_NEW))
1715 		xad->flag |= XAD_EXTENDED;
1716 
1717 	if (!test_cflag(COMMIT_Nolink, ip)) {
1718 		xtlck->lwm.offset =
1719 		    (xtlck->lwm.offset) ? min(index,
1720 					      (int)xtlck->lwm.offset) : index;
1721 		xtlck->lwm.length =
1722 		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1723 	}
1724 
1725 	/* unpin the leaf page */
1726 	XT_PUTPAGE(mp);
1727 
1728 	return rc;
1729 }
1730 
1731 #ifdef _NOTYET
1732 /*
1733  *      xtTailgate()
1734  *
1735  * function: split existing 'tail' extent
1736  *      (split offset >= start offset of tail extent), and
1737  *      relocate and extend the split tail half;
1738  *
1739  * note: existing extent may or may not have been committed.
1740  * caller is responsible for pager buffer cache update, and
1741  * working block allocation map update;
1742  * update pmap: free old split tail extent, alloc new extent;
1743  */
xtTailgate(tid_t tid,struct inode * ip,s64 xoff,s32 xlen,s64 xaddr,int flag)1744 int xtTailgate(tid_t tid,		/* transaction id */
1745 	       struct inode *ip, s64 xoff,	/* split/new extent offset */
1746 	       s32 xlen,	/* new extent length */
1747 	       s64 xaddr,	/* new extent address */
1748 	       int flag)
1749 {
1750 	int rc = 0;
1751 	int cmp;
1752 	struct metapage *mp;	/* meta-page buffer */
1753 	xtpage_t *p;		/* base B+-tree index page */
1754 	s64 bn;
1755 	int index, nextindex, llen, rlen;
1756 	struct btstack btstack;	/* traverse stack */
1757 	struct xtsplit split;	/* split information */
1758 	xad_t *xad;
1759 	struct tlock *tlck;
1760 	struct xtlock *xtlck = 0;
1761 	struct tlock *mtlck;
1762 	struct maplock *pxdlock;
1763 
1764 /*
1765 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1766         (ulong)xoff, xlen, (ulong)xaddr);
1767 */
1768 
1769 	/* there must exist extent to be tailgated */
1770 	if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
1771 		return rc;
1772 
1773 	/* retrieve search result */
1774 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1775 
1776 	if (cmp != 0) {
1777 		XT_PUTPAGE(mp);
1778 		jfs_error(ip->i_sb, "xtTailgate: couldn't find extent");
1779 		return -EIO;
1780 	}
1781 
1782 	/* entry found must be last entry */
1783 	nextindex = le16_to_cpu(p->header.nextindex);
1784 	if (index != nextindex - 1) {
1785 		XT_PUTPAGE(mp);
1786 		jfs_error(ip->i_sb,
1787 			  "xtTailgate: the entry found is not the last entry");
1788 		return -EIO;
1789 	}
1790 
1791 	BT_MARK_DIRTY(mp, ip);
1792 	/*
1793 	 * acquire tlock of the leaf page containing original entry
1794 	 */
1795 	if (!test_cflag(COMMIT_Nolink, ip)) {
1796 		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1797 		xtlck = (struct xtlock *) & tlck->lock;
1798 	}
1799 
1800 	/* completely replace extent ? */
1801 	xad = &p->xad[index];
1802 /*
1803 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1804         (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
1805 */
1806 	if ((llen = xoff - offsetXAD(xad)) == 0)
1807 		goto updateOld;
1808 
1809 	/*
1810 	 *      partially replace extent: insert entry for new extent
1811 	 */
1812 //insertNew:
1813 	/*
1814 	 *      if the leaf page is full, insert the new entry and
1815 	 *      propagate up the router entry for the new page from split
1816 	 *
1817 	 * The xtSplitUp() will insert the entry and unpin the leaf page.
1818 	 */
1819 	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1820 		/* xtSpliUp() unpins leaf pages */
1821 		split.mp = mp;
1822 		split.index = index + 1;
1823 		split.flag = XAD_NEW;
1824 		split.off = xoff;	/* split offset */
1825 		split.len = xlen;
1826 		split.addr = xaddr;
1827 		split.pxdlist = NULL;
1828 		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1829 			return rc;
1830 
1831 		/* get back old page */
1832 		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1833 		if (rc)
1834 			return rc;
1835 		/*
1836 		 * if leaf root has been split, original root has been
1837 		 * copied to new child page, i.e., original entry now
1838 		 * resides on the new child page;
1839 		 */
1840 		if (p->header.flag & BT_INTERNAL) {
1841 			ASSERT(p->header.nextindex ==
1842 			       cpu_to_le16(XTENTRYSTART + 1));
1843 			xad = &p->xad[XTENTRYSTART];
1844 			bn = addressXAD(xad);
1845 			XT_PUTPAGE(mp);
1846 
1847 			/* get new child page */
1848 			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1849 			if (rc)
1850 				return rc;
1851 
1852 			BT_MARK_DIRTY(mp, ip);
1853 			if (!test_cflag(COMMIT_Nolink, ip)) {
1854 				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1855 				xtlck = (struct xtlock *) & tlck->lock;
1856 			}
1857 		}
1858 	}
1859 	/*
1860 	 *      insert the new entry into the leaf page
1861 	 */
1862 	else {
1863 		/* insert the new entry: mark the entry NEW */
1864 		xad = &p->xad[index + 1];
1865 		XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1866 
1867 		/* advance next available entry index */
1868 		p->header.nextindex =
1869 		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1870 	}
1871 
1872 	/* get back old XAD */
1873 	xad = &p->xad[index];
1874 
1875 	/*
1876 	 * truncate/relocate old extent at split offset
1877 	 */
1878       updateOld:
1879 	/* update dmap for old/committed/truncated extent */
1880 	rlen = lengthXAD(xad) - llen;
1881 	if (!(xad->flag & XAD_NEW)) {
1882 		/* free from PWMAP at commit */
1883 		if (!test_cflag(COMMIT_Nolink, ip)) {
1884 			mtlck = txMaplock(tid, ip, tlckMAP);
1885 			pxdlock = (struct maplock *) & mtlck->lock;
1886 			pxdlock->flag = mlckFREEPXD;
1887 			PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
1888 			PXDlength(&pxdlock->pxd, rlen);
1889 			pxdlock->index = 1;
1890 		}
1891 	} else
1892 		/* free from WMAP */
1893 		dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
1894 
1895 	if (llen)
1896 		/* truncate */
1897 		XADlength(xad, llen);
1898 	else
1899 		/* replace */
1900 		XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1901 
1902 	if (!test_cflag(COMMIT_Nolink, ip)) {
1903 		xtlck->lwm.offset = (xtlck->lwm.offset) ?
1904 		    min(index, (int)xtlck->lwm.offset) : index;
1905 		xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1906 		    xtlck->lwm.offset;
1907 	}
1908 
1909 	/* unpin the leaf page */
1910 	XT_PUTPAGE(mp);
1911 
1912 	return rc;
1913 }
1914 #endif /* _NOTYET */
1915 
1916 /*
1917  *      xtUpdate()
1918  *
1919  * function: update XAD;
1920  *
1921  *      update extent for allocated_but_not_recorded or
1922  *      compressed extent;
1923  *
1924  * parameter:
1925  *      nxad    - new XAD;
1926  *                logical extent of the specified XAD must be completely
1927  *                contained by an existing XAD;
1928  */
xtUpdate(tid_t tid,struct inode * ip,xad_t * nxad)1929 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1930 {				/* new XAD */
1931 	int rc = 0;
1932 	int cmp;
1933 	struct metapage *mp;	/* meta-page buffer */
1934 	xtpage_t *p;		/* base B+-tree index page */
1935 	s64 bn;
1936 	int index0, index, newindex, nextindex;
1937 	struct btstack btstack;	/* traverse stack */
1938 	struct xtsplit split;	/* split information */
1939 	xad_t *xad, *lxad, *rxad;
1940 	int xflag;
1941 	s64 nxoff, xoff;
1942 	int nxlen, xlen, lxlen, rxlen;
1943 	s64 nxaddr, xaddr;
1944 	struct tlock *tlck;
1945 	struct xtlock *xtlck = 0;
1946 	int newpage = 0;
1947 
1948 	/* there must exist extent to be tailgated */
1949 	nxoff = offsetXAD(nxad);
1950 	nxlen = lengthXAD(nxad);
1951 	nxaddr = addressXAD(nxad);
1952 
1953 	if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT)))
1954 		return rc;
1955 
1956 	/* retrieve search result */
1957 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1958 
1959 	if (cmp != 0) {
1960 		XT_PUTPAGE(mp);
1961 		jfs_error(ip->i_sb, "xtUpdate: Could not find extent");
1962 		return -EIO;
1963 	}
1964 
1965 	BT_MARK_DIRTY(mp, ip);
1966 	/*
1967 	 * acquire tlock of the leaf page containing original entry
1968 	 */
1969 	if (!test_cflag(COMMIT_Nolink, ip)) {
1970 		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1971 		xtlck = (struct xtlock *) & tlck->lock;
1972 	}
1973 
1974 	xad = &p->xad[index0];
1975 	xflag = xad->flag;
1976 	xoff = offsetXAD(xad);
1977 	xlen = lengthXAD(xad);
1978 	xaddr = addressXAD(xad);
1979 
1980 	/* nXAD must be completely contained within XAD */
1981 	if ((xoff > nxoff) ||
1982 	    (nxoff + nxlen > xoff + xlen)) {
1983 		XT_PUTPAGE(mp);
1984 		jfs_error(ip->i_sb,
1985 			  "xtUpdate: nXAD in not completely contained within XAD");
1986 		return -EIO;
1987 	}
1988 
1989 	index = index0;
1990 	newindex = index + 1;
1991 	nextindex = le16_to_cpu(p->header.nextindex);
1992 
1993 #ifdef  _JFS_WIP_NOCOALESCE
1994 	if (xoff < nxoff)
1995 		goto updateRight;
1996 
1997 	/*
1998 	 * replace XAD with nXAD
1999 	 */
2000       replace:			/* (nxoff == xoff) */
2001 	if (nxlen == xlen) {
2002 		/* replace XAD with nXAD:recorded */
2003 		*xad = *nxad;
2004 		xad->flag = xflag & ~XAD_NOTRECORDED;
2005 
2006 		goto out;
2007 	} else			/* (nxlen < xlen) */
2008 		goto updateLeft;
2009 #endif				/* _JFS_WIP_NOCOALESCE */
2010 
2011 /* #ifdef _JFS_WIP_COALESCE */
2012 	if (xoff < nxoff)
2013 		goto coalesceRight;
2014 
2015 	/*
2016 	 * coalesce with left XAD
2017 	 */
2018 //coalesceLeft: /* (xoff == nxoff) */
2019 	/* is XAD first entry of page ? */
2020 	if (index == XTENTRYSTART)
2021 		goto replace;
2022 
2023 	/* is nXAD logically and physically contiguous with lXAD ? */
2024 	lxad = &p->xad[index - 1];
2025 	lxlen = lengthXAD(lxad);
2026 	if (!(lxad->flag & XAD_NOTRECORDED) &&
2027 	    (nxoff == offsetXAD(lxad) + lxlen) &&
2028 	    (nxaddr == addressXAD(lxad) + lxlen) &&
2029 	    (lxlen + nxlen < MAXXLEN)) {
2030 		/* extend right lXAD */
2031 		index0 = index - 1;
2032 		XADlength(lxad, lxlen + nxlen);
2033 
2034 		/* If we just merged two extents together, need to make sure the
2035 		 * right extent gets logged.  If the left one is marked XAD_NEW,
2036 		 * then we know it will be logged.  Otherwise, mark as
2037 		 * XAD_EXTENDED
2038 		 */
2039 		if (!(lxad->flag & XAD_NEW))
2040 			lxad->flag |= XAD_EXTENDED;
2041 
2042 		if (xlen > nxlen) {
2043 			/* truncate XAD */
2044 			XADoffset(xad, xoff + nxlen);
2045 			XADlength(xad, xlen - nxlen);
2046 			XADaddress(xad, xaddr + nxlen);
2047 			goto out;
2048 		} else {	/* (xlen == nxlen) */
2049 
2050 			/* remove XAD */
2051 			if (index < nextindex - 1)
2052 				memmove(&p->xad[index], &p->xad[index + 1],
2053 					(nextindex - index -
2054 					 1) << L2XTSLOTSIZE);
2055 
2056 			p->header.nextindex =
2057 			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2058 					1);
2059 
2060 			index = index0;
2061 			newindex = index + 1;
2062 			nextindex = le16_to_cpu(p->header.nextindex);
2063 			xoff = nxoff = offsetXAD(lxad);
2064 			xlen = nxlen = lxlen + nxlen;
2065 			xaddr = nxaddr = addressXAD(lxad);
2066 			goto coalesceRight;
2067 		}
2068 	}
2069 
2070 	/*
2071 	 * replace XAD with nXAD
2072 	 */
2073       replace:			/* (nxoff == xoff) */
2074 	if (nxlen == xlen) {
2075 		/* replace XAD with nXAD:recorded */
2076 		*xad = *nxad;
2077 		xad->flag = xflag & ~XAD_NOTRECORDED;
2078 
2079 		goto coalesceRight;
2080 	} else			/* (nxlen < xlen) */
2081 		goto updateLeft;
2082 
2083 	/*
2084 	 * coalesce with right XAD
2085 	 */
2086       coalesceRight:		/* (xoff <= nxoff) */
2087 	/* is XAD last entry of page ? */
2088 	if (newindex == nextindex) {
2089 		if (xoff == nxoff)
2090 			goto out;
2091 		goto updateRight;
2092 	}
2093 
2094 	/* is nXAD logically and physically contiguous with rXAD ? */
2095 	rxad = &p->xad[index + 1];
2096 	rxlen = lengthXAD(rxad);
2097 	if (!(rxad->flag & XAD_NOTRECORDED) &&
2098 	    (nxoff + nxlen == offsetXAD(rxad)) &&
2099 	    (nxaddr + nxlen == addressXAD(rxad)) &&
2100 	    (rxlen + nxlen < MAXXLEN)) {
2101 		/* extend left rXAD */
2102 		XADoffset(rxad, nxoff);
2103 		XADlength(rxad, rxlen + nxlen);
2104 		XADaddress(rxad, nxaddr);
2105 
2106 		/* If we just merged two extents together, need to make sure
2107 		 * the left extent gets logged.  If the right one is marked
2108 		 * XAD_NEW, then we know it will be logged.  Otherwise, mark as
2109 		 * XAD_EXTENDED
2110 		 */
2111 		if (!(rxad->flag & XAD_NEW))
2112 			rxad->flag |= XAD_EXTENDED;
2113 
2114 		if (xlen > nxlen)
2115 			/* truncate XAD */
2116 			XADlength(xad, xlen - nxlen);
2117 		else {		/* (xlen == nxlen) */
2118 
2119 			/* remove XAD */
2120 			memmove(&p->xad[index], &p->xad[index + 1],
2121 				(nextindex - index - 1) << L2XTSLOTSIZE);
2122 
2123 			p->header.nextindex =
2124 			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2125 					1);
2126 		}
2127 
2128 		goto out;
2129 	} else if (xoff == nxoff)
2130 		goto out;
2131 
2132 	if (xoff >= nxoff) {
2133 		XT_PUTPAGE(mp);
2134 		jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff");
2135 		return -EIO;
2136 	}
2137 /* #endif _JFS_WIP_COALESCE */
2138 
2139 	/*
2140 	 * split XAD into (lXAD, nXAD):
2141 	 *
2142 	 *          |---nXAD--->
2143 	 * --|----------XAD----------|--
2144 	 *   |-lXAD-|
2145 	 */
2146       updateRight:		/* (xoff < nxoff) */
2147 	/* truncate old XAD as lXAD:not_recorded */
2148 	xad = &p->xad[index];
2149 	XADlength(xad, nxoff - xoff);
2150 
2151 	/* insert nXAD:recorded */
2152 	if (nextindex == le16_to_cpu(p->header.maxentry)) {
2153 
2154 		/* xtSpliUp() unpins leaf pages */
2155 		split.mp = mp;
2156 		split.index = newindex;
2157 		split.flag = xflag & ~XAD_NOTRECORDED;
2158 		split.off = nxoff;
2159 		split.len = nxlen;
2160 		split.addr = nxaddr;
2161 		split.pxdlist = NULL;
2162 		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2163 			return rc;
2164 
2165 		/* get back old page */
2166 		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2167 		if (rc)
2168 			return rc;
2169 		/*
2170 		 * if leaf root has been split, original root has been
2171 		 * copied to new child page, i.e., original entry now
2172 		 * resides on the new child page;
2173 		 */
2174 		if (p->header.flag & BT_INTERNAL) {
2175 			ASSERT(p->header.nextindex ==
2176 			       cpu_to_le16(XTENTRYSTART + 1));
2177 			xad = &p->xad[XTENTRYSTART];
2178 			bn = addressXAD(xad);
2179 			XT_PUTPAGE(mp);
2180 
2181 			/* get new child page */
2182 			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2183 			if (rc)
2184 				return rc;
2185 
2186 			BT_MARK_DIRTY(mp, ip);
2187 			if (!test_cflag(COMMIT_Nolink, ip)) {
2188 				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2189 				xtlck = (struct xtlock *) & tlck->lock;
2190 			}
2191 		} else {
2192 			/* is nXAD on new page ? */
2193 			if (newindex >
2194 			    (le16_to_cpu(p->header.maxentry) >> 1)) {
2195 				newindex =
2196 				    newindex -
2197 				    le16_to_cpu(p->header.nextindex) +
2198 				    XTENTRYSTART;
2199 				newpage = 1;
2200 			}
2201 		}
2202 	} else {
2203 		/* if insert into middle, shift right remaining entries */
2204 		if (newindex < nextindex)
2205 			memmove(&p->xad[newindex + 1], &p->xad[newindex],
2206 				(nextindex - newindex) << L2XTSLOTSIZE);
2207 
2208 		/* insert the entry */
2209 		xad = &p->xad[newindex];
2210 		*xad = *nxad;
2211 		xad->flag = xflag & ~XAD_NOTRECORDED;
2212 
2213 		/* advance next available entry index. */
2214 		p->header.nextindex =
2215 		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2216 	}
2217 
2218 	/*
2219 	 * does nXAD force 3-way split ?
2220 	 *
2221 	 *          |---nXAD--->|
2222 	 * --|----------XAD-------------|--
2223 	 *   |-lXAD-|           |-rXAD -|
2224 	 */
2225 	if (nxoff + nxlen == xoff + xlen)
2226 		goto out;
2227 
2228 	/* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
2229 	if (newpage) {
2230 		/* close out old page */
2231 		if (!test_cflag(COMMIT_Nolink, ip)) {
2232 			xtlck->lwm.offset = (xtlck->lwm.offset) ?
2233 			    min(index0, (int)xtlck->lwm.offset) : index0;
2234 			xtlck->lwm.length =
2235 			    le16_to_cpu(p->header.nextindex) -
2236 			    xtlck->lwm.offset;
2237 		}
2238 
2239 		bn = le64_to_cpu(p->header.next);
2240 		XT_PUTPAGE(mp);
2241 
2242 		/* get new right page */
2243 		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2244 		if (rc)
2245 			return rc;
2246 
2247 		BT_MARK_DIRTY(mp, ip);
2248 		if (!test_cflag(COMMIT_Nolink, ip)) {
2249 			tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2250 			xtlck = (struct xtlock *) & tlck->lock;
2251 		}
2252 
2253 		index0 = index = newindex;
2254 	} else
2255 		index++;
2256 
2257 	newindex = index + 1;
2258 	nextindex = le16_to_cpu(p->header.nextindex);
2259 	xlen = xlen - (nxoff - xoff);
2260 	xoff = nxoff;
2261 	xaddr = nxaddr;
2262 
2263 	/* recompute split pages */
2264 	if (nextindex == le16_to_cpu(p->header.maxentry)) {
2265 		XT_PUTPAGE(mp);
2266 
2267 		if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT)))
2268 			return rc;
2269 
2270 		/* retrieve search result */
2271 		XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
2272 
2273 		if (cmp != 0) {
2274 			XT_PUTPAGE(mp);
2275 			jfs_error(ip->i_sb, "xtUpdate: xtSearch failed");
2276 			return -EIO;
2277 		}
2278 
2279 		if (index0 != index) {
2280 			XT_PUTPAGE(mp);
2281 			jfs_error(ip->i_sb,
2282 				  "xtUpdate: unexpected value of index");
2283 			return -EIO;
2284 		}
2285 	}
2286 
2287 	/*
2288 	 * split XAD into (nXAD, rXAD)
2289 	 *
2290 	 *          ---nXAD---|
2291 	 * --|----------XAD----------|--
2292 	 *                    |-rXAD-|
2293 	 */
2294       updateLeft:		/* (nxoff == xoff) && (nxlen < xlen) */
2295 	/* update old XAD with nXAD:recorded */
2296 	xad = &p->xad[index];
2297 	*xad = *nxad;
2298 	xad->flag = xflag & ~XAD_NOTRECORDED;
2299 
2300 	/* insert rXAD:not_recorded */
2301 	xoff = xoff + nxlen;
2302 	xlen = xlen - nxlen;
2303 	xaddr = xaddr + nxlen;
2304 	if (nextindex == le16_to_cpu(p->header.maxentry)) {
2305 /*
2306 printf("xtUpdate.updateLeft.split p:0x%p\n", p);
2307 */
2308 		/* xtSpliUp() unpins leaf pages */
2309 		split.mp = mp;
2310 		split.index = newindex;
2311 		split.flag = xflag;
2312 		split.off = xoff;
2313 		split.len = xlen;
2314 		split.addr = xaddr;
2315 		split.pxdlist = NULL;
2316 		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2317 			return rc;
2318 
2319 		/* get back old page */
2320 		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2321 		if (rc)
2322 			return rc;
2323 
2324 		/*
2325 		 * if leaf root has been split, original root has been
2326 		 * copied to new child page, i.e., original entry now
2327 		 * resides on the new child page;
2328 		 */
2329 		if (p->header.flag & BT_INTERNAL) {
2330 			ASSERT(p->header.nextindex ==
2331 			       cpu_to_le16(XTENTRYSTART + 1));
2332 			xad = &p->xad[XTENTRYSTART];
2333 			bn = addressXAD(xad);
2334 			XT_PUTPAGE(mp);
2335 
2336 			/* get new child page */
2337 			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2338 			if (rc)
2339 				return rc;
2340 
2341 			BT_MARK_DIRTY(mp, ip);
2342 			if (!test_cflag(COMMIT_Nolink, ip)) {
2343 				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2344 				xtlck = (struct xtlock *) & tlck->lock;
2345 			}
2346 		}
2347 	} else {
2348 		/* if insert into middle, shift right remaining entries */
2349 		if (newindex < nextindex)
2350 			memmove(&p->xad[newindex + 1], &p->xad[newindex],
2351 				(nextindex - newindex) << L2XTSLOTSIZE);
2352 
2353 		/* insert the entry */
2354 		xad = &p->xad[newindex];
2355 		XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2356 
2357 		/* advance next available entry index. */
2358 		p->header.nextindex =
2359 		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2360 	}
2361 
2362       out:
2363 	if (!test_cflag(COMMIT_Nolink, ip)) {
2364 		xtlck->lwm.offset = (xtlck->lwm.offset) ?
2365 		    min(index0, (int)xtlck->lwm.offset) : index0;
2366 		xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2367 		    xtlck->lwm.offset;
2368 	}
2369 
2370 	/* unpin the leaf page */
2371 	XT_PUTPAGE(mp);
2372 
2373 	return rc;
2374 }
2375 
2376 
2377 /*
2378  *      xtAppend()
2379  *
2380  * function: grow in append mode from contiguous region specified ;
2381  *
2382  * parameter:
2383  *      tid             - transaction id;
2384  *      ip              - file object;
2385  *      xflag           - extent flag:
2386  *      xoff            - extent offset;
2387  *      maxblocks       - max extent length;
2388  *      xlen            - extent length (in/out);
2389  *      xaddrp          - extent address pointer (in/out):
2390  *      flag            -
2391  *
2392  * return:
2393  */
xtAppend(tid_t tid,struct inode * ip,int xflag,s64 xoff,s32 maxblocks,s32 * xlenp,s64 * xaddrp,int flag)2394 int xtAppend(tid_t tid,		/* transaction id */
2395 	     struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
2396 	     s32 * xlenp,	/* (in/out) */
2397 	     s64 * xaddrp,	/* (in/out) */
2398 	     int flag)
2399 {
2400 	int rc = 0;
2401 	struct metapage *mp;	/* meta-page buffer */
2402 	xtpage_t *p;		/* base B+-tree index page */
2403 	s64 bn, xaddr;
2404 	int index, nextindex;
2405 	struct btstack btstack;	/* traverse stack */
2406 	struct xtsplit split;	/* split information */
2407 	xad_t *xad;
2408 	int cmp;
2409 	struct tlock *tlck;
2410 	struct xtlock *xtlck;
2411 	int nsplit, nblocks, xlen;
2412 	struct pxdlist pxdlist;
2413 	pxd_t *pxd;
2414 
2415 	xaddr = *xaddrp;
2416 	xlen = *xlenp;
2417 	jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
2418 		 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
2419 
2420 	/*
2421 	 *      search for the entry location at which to insert:
2422 	 *
2423 	 * xtFastSearch() and xtSearch() both returns (leaf page
2424 	 * pinned, index at which to insert).
2425 	 * n.b. xtSearch() may return index of maxentry of
2426 	 * the full page.
2427 	 */
2428 	if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
2429 		return rc;
2430 
2431 	/* retrieve search result */
2432 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2433 
2434 	if (cmp == 0) {
2435 		rc = -EEXIST;
2436 		goto out;
2437 	}
2438 //insert:
2439 	/*
2440 	 *      insert entry for new extent
2441 	 */
2442 	xflag |= XAD_NEW;
2443 
2444 	/*
2445 	 *      if the leaf page is full, split the page and
2446 	 *      propagate up the router entry for the new page from split
2447 	 *
2448 	 * The xtSplitUp() will insert the entry and unpin the leaf page.
2449 	 */
2450 	nextindex = le16_to_cpu(p->header.nextindex);
2451 	if (nextindex < le16_to_cpu(p->header.maxentry))
2452 		goto insertLeaf;
2453 
2454 	/*
2455 	 * allocate new index blocks to cover index page split(s)
2456 	 */
2457 	nsplit = btstack.nsplit;
2458 	split.pxdlist = &pxdlist;
2459 	pxdlist.maxnpxd = pxdlist.npxd = 0;
2460 	pxd = &pxdlist.pxd[0];
2461 	nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2462 	for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2463 		if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2464 			PXDaddress(pxd, xaddr);
2465 			PXDlength(pxd, nblocks);
2466 
2467 			pxdlist.maxnpxd++;
2468 
2469 			continue;
2470 		}
2471 
2472 		/* undo allocation */
2473 
2474 		goto out;
2475 	}
2476 
2477 	xlen = min(xlen, maxblocks);
2478 
2479 	/*
2480 	 * allocate data extent requested
2481 	 */
2482 	if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2483 		goto out;
2484 
2485 	split.mp = mp;
2486 	split.index = index;
2487 	split.flag = xflag;
2488 	split.off = xoff;
2489 	split.len = xlen;
2490 	split.addr = xaddr;
2491 	if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
2492 		/* undo data extent allocation */
2493 		dbFree(ip, *xaddrp, (s64) * xlenp);
2494 
2495 		return rc;
2496 	}
2497 
2498 	*xaddrp = xaddr;
2499 	*xlenp = xlen;
2500 	return 0;
2501 
2502 	/*
2503 	 *      insert the new entry into the leaf page
2504 	 */
2505       insertLeaf:
2506 	/*
2507 	 * allocate data extent requested
2508 	 */
2509 	if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2510 		goto out;
2511 
2512 	BT_MARK_DIRTY(mp, ip);
2513 	/*
2514 	 * acquire a transaction lock on the leaf page;
2515 	 *
2516 	 * action: xad insertion/extension;
2517 	 */
2518 	tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2519 	xtlck = (struct xtlock *) & tlck->lock;
2520 
2521 	/* insert the new entry: mark the entry NEW */
2522 	xad = &p->xad[index];
2523 	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2524 
2525 	/* advance next available entry index */
2526 	p->header.nextindex =
2527 	    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2528 
2529 	xtlck->lwm.offset =
2530 	    (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
2531 	xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2532 	    xtlck->lwm.offset;
2533 
2534 	*xaddrp = xaddr;
2535 	*xlenp = xlen;
2536 
2537       out:
2538 	/* unpin the leaf page */
2539 	XT_PUTPAGE(mp);
2540 
2541 	return rc;
2542 }
2543 #ifdef _STILL_TO_PORT
2544 
2545 /* - TBD for defragmentaion/reorganization -
2546  *
2547  *      xtDelete()
2548  *
2549  * function:
2550  *      delete the entry with the specified key.
2551  *
2552  *      N.B.: whole extent of the entry is assumed to be deleted.
2553  *
2554  * parameter:
2555  *
2556  * return:
2557  *       ENOENT: if the entry is not found.
2558  *
2559  * exception:
2560  */
xtDelete(tid_t tid,struct inode * ip,s64 xoff,s32 xlen,int flag)2561 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
2562 {
2563 	int rc = 0;
2564 	struct btstack btstack;
2565 	int cmp;
2566 	s64 bn;
2567 	struct metapage *mp;
2568 	xtpage_t *p;
2569 	int index, nextindex;
2570 	struct tlock *tlck;
2571 	struct xtlock *xtlck;
2572 
2573 	/*
2574 	 * find the matching entry; xtSearch() pins the page
2575 	 */
2576 	if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0)))
2577 		return rc;
2578 
2579 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2580 	if (cmp) {
2581 		/* unpin the leaf page */
2582 		XT_PUTPAGE(mp);
2583 		return -ENOENT;
2584 	}
2585 
2586 	/*
2587 	 * delete the entry from the leaf page
2588 	 */
2589 	nextindex = le16_to_cpu(p->header.nextindex);
2590 	p->header.nextindex =
2591 	    cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1);
2592 
2593 	/*
2594 	 * if the leaf page bocome empty, free the page
2595 	 */
2596 	if (p->header.nextindex == cpu_to_le16(XTENTRYSTART))
2597 		return (xtDeleteUp(tid, ip, mp, p, &btstack));
2598 
2599 	BT_MARK_DIRTY(mp, ip);
2600 	/*
2601 	 * acquire a transaction lock on the leaf page;
2602 	 *
2603 	 * action:xad deletion;
2604 	 */
2605 	tlck = txLock(tid, ip, mp, tlckXTREE);
2606 	xtlck = (struct xtlock *) & tlck->lock;
2607 	xtlck->lwm.offset =
2608 	    (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
2609 
2610 	/* if delete from middle, shift left/compact the remaining entries */
2611 	if (index < nextindex - 1)
2612 		memmove(&p->xad[index], &p->xad[index + 1],
2613 			(nextindex - index - 1) * sizeof(xad_t));
2614 
2615 	XT_PUTPAGE(mp);
2616 
2617 	return 0;
2618 }
2619 
2620 
2621 /* - TBD for defragmentaion/reorganization -
2622  *
2623  *      xtDeleteUp()
2624  *
2625  * function:
2626  *      free empty pages as propagating deletion up the tree
2627  *
2628  * parameter:
2629  *
2630  * return:
2631  */
2632 static int
xtDeleteUp(tid_t tid,struct inode * ip,struct metapage * fmp,xtpage_t * fp,struct btstack * btstack)2633 xtDeleteUp(tid_t tid, struct inode *ip,
2634 	   struct metapage * fmp, xtpage_t * fp, struct btstack * btstack)
2635 {
2636 	int rc = 0;
2637 	struct metapage *mp;
2638 	xtpage_t *p;
2639 	int index, nextindex;
2640 	s64 xaddr;
2641 	int xlen;
2642 	struct btframe *parent;
2643 	struct tlock *tlck;
2644 	struct xtlock *xtlck;
2645 
2646 	/*
2647 	 * keep root leaf page which has become empty
2648 	 */
2649 	if (fp->header.flag & BT_ROOT) {
2650 		/* keep the root page */
2651 		fp->header.flag &= ~BT_INTERNAL;
2652 		fp->header.flag |= BT_LEAF;
2653 		fp->header.nextindex = cpu_to_le16(XTENTRYSTART);
2654 
2655 		/* XT_PUTPAGE(fmp); */
2656 
2657 		return 0;
2658 	}
2659 
2660 	/*
2661 	 * free non-root leaf page
2662 	 */
2663 	if ((rc = xtRelink(tid, ip, fp))) {
2664 		XT_PUTPAGE(fmp);
2665 		return rc;
2666 	}
2667 
2668 	xaddr = addressPXD(&fp->header.self);
2669 	xlen = lengthPXD(&fp->header.self);
2670 	/* free the page extent */
2671 	dbFree(ip, xaddr, (s64) xlen);
2672 
2673 	/* free the buffer page */
2674 	discard_metapage(fmp);
2675 
2676 	/*
2677 	 * propagate page deletion up the index tree
2678 	 *
2679 	 * If the delete from the parent page makes it empty,
2680 	 * continue all the way up the tree.
2681 	 * stop if the root page is reached (which is never deleted) or
2682 	 * if the entry deletion does not empty the page.
2683 	 */
2684 	while ((parent = BT_POP(btstack)) != NULL) {
2685 		/* get/pin the parent page <sp> */
2686 		XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2687 		if (rc)
2688 			return rc;
2689 
2690 		index = parent->index;
2691 
2692 		/* delete the entry for the freed child page from parent.
2693 		 */
2694 		nextindex = le16_to_cpu(p->header.nextindex);
2695 
2696 		/*
2697 		 * the parent has the single entry being deleted:
2698 		 * free the parent page which has become empty.
2699 		 */
2700 		if (nextindex == 1) {
2701 			if (p->header.flag & BT_ROOT) {
2702 				/* keep the root page */
2703 				p->header.flag &= ~BT_INTERNAL;
2704 				p->header.flag |= BT_LEAF;
2705 				p->header.nextindex =
2706 				    cpu_to_le16(XTENTRYSTART);
2707 
2708 				/* XT_PUTPAGE(mp); */
2709 
2710 				break;
2711 			} else {
2712 				/* free the parent page */
2713 				if ((rc = xtRelink(tid, ip, p)))
2714 					return rc;
2715 
2716 				xaddr = addressPXD(&p->header.self);
2717 				/* free the page extent */
2718 				dbFree(ip, xaddr,
2719 				       (s64) JFS_SBI(ip->i_sb)->nbperpage);
2720 
2721 				/* unpin/free the buffer page */
2722 				discard_metapage(mp);
2723 
2724 				/* propagate up */
2725 				continue;
2726 			}
2727 		}
2728 		/*
2729 		 * the parent has other entries remaining:
2730 		 * delete the router entry from the parent page.
2731 		 */
2732 		else {
2733 			BT_MARK_DIRTY(mp, ip);
2734 			/*
2735 			 * acquire a transaction lock on the leaf page;
2736 			 *
2737 			 * action:xad deletion;
2738 			 */
2739 			tlck = txLock(tid, ip, mp, tlckXTREE);
2740 			xtlck = (struct xtlock *) & tlck->lock;
2741 			xtlck->lwm.offset =
2742 			    (xtlck->lwm.offset) ? min(index,
2743 						      xtlck->lwm.
2744 						      offset) : index;
2745 
2746 			/* if delete from middle,
2747 			 * shift left/compact the remaining entries in the page
2748 			 */
2749 			if (index < nextindex - 1)
2750 				memmove(&p->xad[index], &p->xad[index + 1],
2751 					(nextindex - index -
2752 					 1) << L2XTSLOTSIZE);
2753 
2754 			p->header.nextindex =
2755 			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2756 					1);
2757 			jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
2758 				 (ulong) parent->bn, index);
2759 		}
2760 
2761 		/* unpin the parent page */
2762 		XT_PUTPAGE(mp);
2763 
2764 		/* exit propagation up */
2765 		break;
2766 	}
2767 
2768 	return 0;
2769 }
2770 
2771 
2772 /*
2773  * NAME:        xtRelocate()
2774  *
2775  * FUNCTION:    relocate xtpage or data extent of regular file;
2776  *              This function is mainly used by defragfs utility.
2777  *
2778  * NOTE:        This routine does not have the logic to handle
2779  *              uncommitted allocated extent. The caller should call
2780  *              txCommit() to commit all the allocation before call
2781  *              this routine.
2782  */
2783 int
xtRelocate(tid_t tid,struct inode * ip,xad_t * oxad,s64 nxaddr,int xtype)2784 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
2785 	   s64 nxaddr,		/* new xaddr */
2786 	   int xtype)
2787 {				/* extent type: XTPAGE or DATAEXT */
2788 	int rc = 0;
2789 	struct tblock *tblk;
2790 	struct tlock *tlck;
2791 	struct xtlock *xtlck;
2792 	struct metapage *mp, *pmp, *lmp, *rmp;	/* meta-page buffer */
2793 	xtpage_t *p, *pp, *rp, *lp;	/* base B+-tree index page */
2794 	xad_t *xad;
2795 	pxd_t *pxd;
2796 	s64 xoff, xsize;
2797 	int xlen;
2798 	s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn;
2799 	cbuf_t *cp;
2800 	s64 offset, nbytes, nbrd, pno;
2801 	int nb, npages, nblks;
2802 	s64 bn;
2803 	int cmp;
2804 	int index;
2805 	struct pxd_lock *pxdlock;
2806 	struct btstack btstack;	/* traverse stack */
2807 
2808 	xtype = xtype & EXTENT_TYPE;
2809 
2810 	xoff = offsetXAD(oxad);
2811 	oxaddr = addressXAD(oxad);
2812 	xlen = lengthXAD(oxad);
2813 
2814 	/* validate extent offset */
2815 	offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2816 	if (offset >= ip->i_size)
2817 		return -ESTALE;	/* stale extent */
2818 
2819 	jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx",
2820 		 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
2821 
2822 	/*
2823 	 *      1. get and validate the parent xtpage/xad entry
2824 	 *      covering the source extent to be relocated;
2825 	 */
2826 	if (xtype == DATAEXT) {
2827 		/* search in leaf entry */
2828 		rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
2829 		if (rc)
2830 			return rc;
2831 
2832 		/* retrieve search result */
2833 		XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2834 
2835 		if (cmp) {
2836 			XT_PUTPAGE(pmp);
2837 			return -ESTALE;
2838 		}
2839 
2840 		/* validate for exact match with a single entry */
2841 		xad = &pp->xad[index];
2842 		if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) {
2843 			XT_PUTPAGE(pmp);
2844 			return -ESTALE;
2845 		}
2846 	} else {		/* (xtype == XTPAGE) */
2847 
2848 		/* search in internal entry */
2849 		rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0);
2850 		if (rc)
2851 			return rc;
2852 
2853 		/* retrieve search result */
2854 		XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2855 
2856 		if (cmp) {
2857 			XT_PUTPAGE(pmp);
2858 			return -ESTALE;
2859 		}
2860 
2861 		/* xtSearchNode() validated for exact match with a single entry
2862 		 */
2863 		xad = &pp->xad[index];
2864 	}
2865 	jfs_info("xtRelocate: parent xad entry validated.");
2866 
2867 	/*
2868 	 *      2. relocate the extent
2869 	 */
2870 	if (xtype == DATAEXT) {
2871 		/* if the extent is allocated-but-not-recorded
2872 		 * there is no real data to be moved in this extent,
2873 		 */
2874 		if (xad->flag & XAD_NOTRECORDED)
2875 			goto out;
2876 		else
2877 			/* release xtpage for cmRead()/xtLookup() */
2878 			XT_PUTPAGE(pmp);
2879 
2880 		/*
2881 		 *      cmRelocate()
2882 		 *
2883 		 * copy target data pages to be relocated;
2884 		 *
2885 		 * data extent must start at page boundary and
2886 		 * multiple of page size (except the last data extent);
2887 		 * read in each page of the source data extent into cbuf,
2888 		 * update the cbuf extent descriptor of the page to be
2889 		 * homeward bound to new dst data extent
2890 		 * copy the data from the old extent to new extent.
2891 		 * copy is essential for compressed files to avoid problems
2892 		 * that can arise if there was a change in compression
2893 		 * algorithms.
2894 		 * it is a good strategy because it may disrupt cache
2895 		 * policy to keep the pages in memory afterwards.
2896 		 */
2897 		offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2898 		assert((offset & CM_OFFSET) == 0);
2899 		nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2900 		pno = offset >> CM_L2BSIZE;
2901 		npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
2902 /*
2903                 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
2904                          (offset >> CM_L2BSIZE) + 1;
2905 */
2906 		sxaddr = oxaddr;
2907 		dxaddr = nxaddr;
2908 
2909 		/* process the request one cache buffer at a time */
2910 		for (nbrd = 0; nbrd < nbytes; nbrd += nb,
2911 		     offset += nb, pno++, npages--) {
2912 			/* compute page size */
2913 			nb = min(nbytes - nbrd, CM_BSIZE);
2914 
2915 			/* get the cache buffer of the page */
2916 			if (rc = cmRead(ip, offset, npages, &cp))
2917 				break;
2918 
2919 			assert(addressPXD(&cp->cm_pxd) == sxaddr);
2920 			assert(!cp->cm_modified);
2921 
2922 			/* bind buffer with the new extent address */
2923 			nblks = nb >> JFS_IP(ip->i_sb)->l2bsize;
2924 			cmSetXD(ip, cp, pno, dxaddr, nblks);
2925 
2926 			/* release the cbuf, mark it as modified */
2927 			cmPut(cp, TRUE);
2928 
2929 			dxaddr += nblks;
2930 			sxaddr += nblks;
2931 		}
2932 
2933 		/* get back parent page */
2934 		if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0)))
2935 			return rc;
2936 
2937 		XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2938 		jfs_info("xtRelocate: target data extent relocated.");
2939 	} else {		/* (xtype  == XTPAGE) */
2940 
2941 		/*
2942 		 * read in the target xtpage from the source extent;
2943 		 */
2944 		XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2945 		if (rc) {
2946 			XT_PUTPAGE(pmp);
2947 			return rc;
2948 		}
2949 
2950 		/*
2951 		 * read in sibling pages if any to update sibling pointers;
2952 		 */
2953 		rmp = NULL;
2954 		if (p->header.next) {
2955 			nextbn = le64_to_cpu(p->header.next);
2956 			XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
2957 			if (rc) {
2958 				XT_PUTPAGE(pmp);
2959 				XT_PUTPAGE(mp);
2960 				return (rc);
2961 			}
2962 		}
2963 
2964 		lmp = NULL;
2965 		if (p->header.prev) {
2966 			prevbn = le64_to_cpu(p->header.prev);
2967 			XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
2968 			if (rc) {
2969 				XT_PUTPAGE(pmp);
2970 				XT_PUTPAGE(mp);
2971 				if (rmp)
2972 					XT_PUTPAGE(rmp);
2973 				return (rc);
2974 			}
2975 		}
2976 
2977 		/* at this point, all xtpages to be updated are in memory */
2978 
2979 		/*
2980 		 * update sibling pointers of sibling xtpages if any;
2981 		 */
2982 		if (lmp) {
2983 			BT_MARK_DIRTY(lmp, ip);
2984 			tlck =
2985 			    txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
2986 			lp->header.next = cpu_to_le64(nxaddr);
2987 			XT_PUTPAGE(lmp);
2988 		}
2989 
2990 		if (rmp) {
2991 			BT_MARK_DIRTY(rmp, ip);
2992 			tlck =
2993 			    txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
2994 			rp->header.prev = cpu_to_le64(nxaddr);
2995 			XT_PUTPAGE(rmp);
2996 		}
2997 
2998 		/*
2999 		 * update the target xtpage to be relocated
3000 		 *
3001 		 * update the self address of the target page
3002 		 * and write to destination extent;
3003 		 * redo image covers the whole xtpage since it is new page
3004 		 * to the destination extent;
3005 		 * update of bmap for the free of source extent
3006 		 * of the target xtpage itself:
3007 		 * update of bmap for the allocation of destination extent
3008 		 * of the target xtpage itself:
3009 		 * update of bmap for the extents covered by xad entries in
3010 		 * the target xtpage is not necessary since they are not
3011 		 * updated;
3012 		 * if not committed before this relocation,
3013 		 * target page may contain XAD_NEW entries which must
3014 		 * be scanned for bmap update (logredo() always
3015 		 * scan xtpage REDOPAGE image for bmap update);
3016 		 * if committed before this relocation (tlckRELOCATE),
3017 		 * scan may be skipped by commit() and logredo();
3018 		 */
3019 		BT_MARK_DIRTY(mp, ip);
3020 		/* tlckNEW init  xtlck->lwm.offset = XTENTRYSTART; */
3021 		tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
3022 		xtlck = (struct xtlock *) & tlck->lock;
3023 
3024 		/* update the self address in the xtpage header */
3025 		pxd = &p->header.self;
3026 		PXDaddress(pxd, nxaddr);
3027 
3028 		/* linelock for the after image of the whole page */
3029 		xtlck->lwm.length =
3030 		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
3031 
3032 		/* update the buffer extent descriptor of target xtpage */
3033 		xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
3034 		bmSetXD(mp, nxaddr, xsize);
3035 
3036 		/* unpin the target page to new homeward bound */
3037 		XT_PUTPAGE(mp);
3038 		jfs_info("xtRelocate: target xtpage relocated.");
3039 	}
3040 
3041 	/*
3042 	 *      3. acquire maplock for the source extent to be freed;
3043 	 *
3044 	 * acquire a maplock saving the src relocated extent address;
3045 	 * to free of the extent at commit time;
3046 	 */
3047       out:
3048 	/* if DATAEXT relocation, write a LOG_UPDATEMAP record for
3049 	 * free PXD of the source data extent (logredo() will update
3050 	 * bmap for free of source data extent), and update bmap for
3051 	 * free of the source data extent;
3052 	 */
3053 	if (xtype == DATAEXT)
3054 		tlck = txMaplock(tid, ip, tlckMAP);
3055 	/* if XTPAGE relocation, write a LOG_NOREDOPAGE record
3056 	 * for the source xtpage (logredo() will init NoRedoPage
3057 	 * filter and will also update bmap for free of the source
3058 	 * xtpage), and update bmap for free of the source xtpage;
3059 	 * N.B. We use tlckMAP instead of tlkcXTREE because there
3060 	 *      is no buffer associated with this lock since the buffer
3061 	 *      has been redirected to the target location.
3062 	 */
3063 	else			/* (xtype  == XTPAGE) */
3064 		tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
3065 
3066 	pxdlock = (struct pxd_lock *) & tlck->lock;
3067 	pxdlock->flag = mlckFREEPXD;
3068 	PXDaddress(&pxdlock->pxd, oxaddr);
3069 	PXDlength(&pxdlock->pxd, xlen);
3070 	pxdlock->index = 1;
3071 
3072 	/*
3073 	 *      4. update the parent xad entry for relocation;
3074 	 *
3075 	 * acquire tlck for the parent entry with XAD_NEW as entry
3076 	 * update which will write LOG_REDOPAGE and update bmap for
3077 	 * allocation of XAD_NEW destination extent;
3078 	 */
3079 	jfs_info("xtRelocate: update parent xad entry.");
3080 	BT_MARK_DIRTY(pmp, ip);
3081 	tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW);
3082 	xtlck = (struct xtlock *) & tlck->lock;
3083 
3084 	/* update the XAD with the new destination extent; */
3085 	xad = &pp->xad[index];
3086 	xad->flag |= XAD_NEW;
3087 	XADaddress(xad, nxaddr);
3088 
3089 	xtlck->lwm.offset = min(index, xtlck->lwm.offset);
3090 	xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) -
3091 	    xtlck->lwm.offset;
3092 
3093 	/* unpin the parent xtpage */
3094 	XT_PUTPAGE(pmp);
3095 
3096 	return rc;
3097 }
3098 
3099 
3100 /*
3101  *      xtSearchNode()
3102  *
3103  * function:    search for the internal xad entry covering specified extent.
3104  *              This function is mainly used by defragfs utility.
3105  *
3106  * parameters:
3107  *      ip      - file object;
3108  *      xad     - extent to find;
3109  *      cmpp    - comparison result:
3110  *      btstack - traverse stack;
3111  *      flag    - search process flag;
3112  *
3113  * returns:
3114  *      btstack contains (bn, index) of search path traversed to the entry.
3115  *      *cmpp is set to result of comparison with the entry returned.
3116  *      the page containing the entry is pinned at exit.
3117  */
xtSearchNode(struct inode * ip,xad_t * xad,int * cmpp,struct btstack * btstack,int flag)3118 static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
3119 			int *cmpp, struct btstack * btstack, int flag)
3120 {
3121 	int rc = 0;
3122 	s64 xoff, xaddr;
3123 	int xlen;
3124 	int cmp = 1;		/* init for empty page */
3125 	s64 bn;			/* block number */
3126 	struct metapage *mp;	/* meta-page buffer */
3127 	xtpage_t *p;		/* page */
3128 	int base, index, lim;
3129 	struct btframe *btsp;
3130 	s64 t64;
3131 
3132 	BT_CLR(btstack);
3133 
3134 	xoff = offsetXAD(xad);
3135 	xlen = lengthXAD(xad);
3136 	xaddr = addressXAD(xad);
3137 
3138 	/*
3139 	 *      search down tree from root:
3140 	 *
3141 	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
3142 	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
3143 	 *
3144 	 * if entry with search key K is not found
3145 	 * internal page search find the entry with largest key Ki
3146 	 * less than K which point to the child page to search;
3147 	 * leaf page search find the entry with smallest key Kj
3148 	 * greater than K so that the returned index is the position of
3149 	 * the entry to be shifted right for insertion of new entry.
3150 	 * for empty tree, search key is greater than any key of the tree.
3151 	 *
3152 	 * by convention, root bn = 0.
3153 	 */
3154 	for (bn = 0;;) {
3155 		/* get/pin the page to search */
3156 		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3157 		if (rc)
3158 			return rc;
3159 		if (p->header.flag & BT_LEAF) {
3160 			XT_PUTPAGE(mp);
3161 			return -ESTALE;
3162 		}
3163 
3164 		lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3165 
3166 		/*
3167 		 * binary search with search key K on the current page
3168 		 */
3169 		for (base = XTENTRYSTART; lim; lim >>= 1) {
3170 			index = base + (lim >> 1);
3171 
3172 			XT_CMP(cmp, xoff, &p->xad[index], t64);
3173 			if (cmp == 0) {
3174 				/*
3175 				 *      search hit
3176 				 *
3177 				 * verify for exact match;
3178 				 */
3179 				if (xaddr == addressXAD(&p->xad[index]) &&
3180 				    xoff == offsetXAD(&p->xad[index])) {
3181 					*cmpp = cmp;
3182 
3183 					/* save search result */
3184 					btsp = btstack->top;
3185 					btsp->bn = bn;
3186 					btsp->index = index;
3187 					btsp->mp = mp;
3188 
3189 					return 0;
3190 				}
3191 
3192 				/* descend/search its child page */
3193 				goto next;
3194 			}
3195 
3196 			if (cmp > 0) {
3197 				base = index + 1;
3198 				--lim;
3199 			}
3200 		}
3201 
3202 		/*
3203 		 *      search miss - non-leaf page:
3204 		 *
3205 		 * base is the smallest index with key (Kj) greater than
3206 		 * search key (K) and may be zero or maxentry index.
3207 		 * if base is non-zero, decrement base by one to get the parent
3208 		 * entry of the child page to search.
3209 		 */
3210 		index = base ? base - 1 : base;
3211 
3212 		/*
3213 		 * go down to child page
3214 		 */
3215 	      next:
3216 		/* get the child page block number */
3217 		bn = addressXAD(&p->xad[index]);
3218 
3219 		/* unpin the parent page */
3220 		XT_PUTPAGE(mp);
3221 	}
3222 }
3223 
3224 
3225 /*
3226  *      xtRelink()
3227  *
3228  * function:
3229  *      link around a freed page.
3230  *
3231  * Parameter:
3232  *      int           tid,
3233  *      struct inode    *ip,
3234  *      xtpage_t        *p)
3235  *
3236  * returns:
3237  */
xtRelink(tid_t tid,struct inode * ip,xtpage_t * p)3238 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
3239 {
3240 	int rc = 0;
3241 	struct metapage *mp;
3242 	s64 nextbn, prevbn;
3243 	struct tlock *tlck;
3244 
3245 	nextbn = le64_to_cpu(p->header.next);
3246 	prevbn = le64_to_cpu(p->header.prev);
3247 
3248 	/* update prev pointer of the next page */
3249 	if (nextbn != 0) {
3250 		XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
3251 		if (rc)
3252 			return rc;
3253 
3254 		/*
3255 		 * acquire a transaction lock on the page;
3256 		 *
3257 		 * action: update prev pointer;
3258 		 */
3259 		BT_MARK_DIRTY(mp, ip);
3260 		tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3261 
3262 		/* the page may already have been tlock'd */
3263 
3264 		p->header.prev = cpu_to_le64(prevbn);
3265 
3266 		XT_PUTPAGE(mp);
3267 	}
3268 
3269 	/* update next pointer of the previous page */
3270 	if (prevbn != 0) {
3271 		XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
3272 		if (rc)
3273 			return rc;
3274 
3275 		/*
3276 		 * acquire a transaction lock on the page;
3277 		 *
3278 		 * action: update next pointer;
3279 		 */
3280 		BT_MARK_DIRTY(mp, ip);
3281 		tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3282 
3283 		/* the page may already have been tlock'd */
3284 
3285 		p->header.next = le64_to_cpu(nextbn);
3286 
3287 		XT_PUTPAGE(mp);
3288 	}
3289 
3290 	return 0;
3291 }
3292 #endif				/*  _STILL_TO_PORT */
3293 
3294 
3295 /*
3296  *      xtInitRoot()
3297  *
3298  * initialize file root (inline in inode)
3299  */
xtInitRoot(tid_t tid,struct inode * ip)3300 void xtInitRoot(tid_t tid, struct inode *ip)
3301 {
3302 	xtpage_t *p;
3303 
3304 	/*
3305 	 * acquire a transaction lock on the root
3306 	 *
3307 	 * action:
3308 	 */
3309 	txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
3310 		      tlckXTREE | tlckNEW);
3311 	p = &JFS_IP(ip)->i_xtroot;
3312 
3313 	p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
3314 	p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3315 
3316 	if (S_ISDIR(ip->i_mode))
3317 		p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
3318 	else {
3319 		p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
3320 		ip->i_size = 0;
3321 	}
3322 
3323 
3324 	return;
3325 }
3326 
3327 
3328 /*
3329  * We can run into a deadlock truncating a file with a large number of
3330  * xtree pages (large fragmented file).  A robust fix would entail a
3331  * reservation system where we would reserve a number of metadata pages
3332  * and tlocks which we would be guaranteed without a deadlock.  Without
3333  * this, a partial fix is to limit number of metadata pages we will lock
3334  * in a single transaction.  Currently we will truncate the file so that
3335  * no more than 50 leaf pages will be locked.  The caller of xtTruncate
3336  * will be responsible for ensuring that the current transaction gets
3337  * committed, and that subsequent transactions are created to truncate
3338  * the file further if needed.
3339  */
3340 #define MAX_TRUNCATE_LEAVES 50
3341 
3342 /*
3343  *      xtTruncate()
3344  *
3345  * function:
3346  *      traverse for truncation logging backward bottom up;
3347  *      terminate at the last extent entry at the current subtree
3348  *      root page covering new down size.
3349  *      truncation may occur within the last extent entry.
3350  *
3351  * parameter:
3352  *      int           tid,
3353  *      struct inode    *ip,
3354  *      s64           newsize,
3355  *      int           type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
3356  *
3357  * return:
3358  *
3359  * note:
3360  *      PWMAP:
3361  *       1. truncate (non-COMMIT_NOLINK file)
3362  *          by jfs_truncate() or jfs_open(O_TRUNC):
3363  *          xtree is updated;
3364  *	 2. truncate index table of directory when last entry removed
3365  *       map update via tlock at commit time;
3366  *      PMAP:
3367  *	 Call xtTruncate_pmap instead
3368  *      WMAP:
3369  *       1. remove (free zero link count) on last reference release
3370  *          (pmap has been freed at commit zero link count);
3371  *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
3372  *          xtree is updated;
3373  *       map update directly at truncation time;
3374  *
3375  *      if (DELETE)
3376  *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
3377  *      else if (TRUNCATE)
3378  *              must write LOG_NOREDOPAGE for deleted index page;
3379  *
3380  * pages may already have been tlocked by anonymous transactions
3381  * during file growth (i.e., write) before truncation;
3382  *
3383  * except last truncated entry, deleted entries remains as is
3384  * in the page (nextindex is updated) for other use
3385  * (e.g., log/update allocation map): this avoid copying the page
3386  * info but delay free of pages;
3387  *
3388  */
xtTruncate(tid_t tid,struct inode * ip,s64 newsize,int flag)3389 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3390 {
3391 	int rc = 0;
3392 	s64 teof;
3393 	struct metapage *mp;
3394 	xtpage_t *p;
3395 	s64 bn;
3396 	int index, nextindex;
3397 	xad_t *xad;
3398 	s64 xoff, xaddr;
3399 	int xlen, len, freexlen;
3400 	struct btstack btstack;
3401 	struct btframe *parent;
3402 	struct tblock *tblk = 0;
3403 	struct tlock *tlck = 0;
3404 	struct xtlock *xtlck = 0;
3405 	struct xdlistlock xadlock;	/* maplock for COMMIT_WMAP */
3406 	struct pxd_lock *pxdlock;		/* maplock for COMMIT_WMAP */
3407 	s64 nfreed;
3408 	int freed, log;
3409 	int locked_leaves = 0;
3410 
3411 	/* save object truncation type */
3412 	if (tid) {
3413 		tblk = tid_to_tblock(tid);
3414 		tblk->xflag |= flag;
3415 	}
3416 
3417 	nfreed = 0;
3418 
3419 	flag &= COMMIT_MAP;
3420 	assert(flag != COMMIT_PMAP);
3421 
3422 	if (flag == COMMIT_PWMAP)
3423 		log = 1;
3424 	else {
3425 		log = 0;
3426 		xadlock.flag = mlckFREEXADLIST;
3427 		xadlock.index = 1;
3428 	}
3429 
3430 	/*
3431 	 * if the newsize is not an integral number of pages,
3432 	 * the file between newsize and next page boundary will
3433 	 * be cleared.
3434 	 * if truncating into a file hole, it will cause
3435 	 * a full block to be allocated for the logical block.
3436 	 */
3437 
3438 	/*
3439 	 * release page blocks of truncated region <teof, eof>
3440 	 *
3441 	 * free the data blocks from the leaf index blocks.
3442 	 * delete the parent index entries corresponding to
3443 	 * the freed child data/index blocks.
3444 	 * free the index blocks themselves which aren't needed
3445 	 * in new sized file.
3446 	 *
3447 	 * index blocks are updated only if the blocks are to be
3448 	 * retained in the new sized file.
3449 	 * if type is PMAP, the data and index pages are NOT
3450 	 * freed, and the data and index blocks are NOT freed
3451 	 * from  working map.
3452 	 * (this will allow continued access of data/index of
3453 	 * temporary file (zerolink count file truncated to zero-length)).
3454 	 */
3455 	teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
3456 	    JFS_SBI(ip->i_sb)->l2bsize;
3457 
3458 	/* clear stack */
3459 	BT_CLR(&btstack);
3460 
3461 	/*
3462 	 * start with root
3463 	 *
3464 	 * root resides in the inode
3465 	 */
3466 	bn = 0;
3467 
3468 	/*
3469 	 * first access of each page:
3470 	 */
3471       getPage:
3472 	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3473 	if (rc)
3474 		return rc;
3475 
3476 	/* process entries backward from last index */
3477 	index = le16_to_cpu(p->header.nextindex) - 1;
3478 
3479 	if (p->header.flag & BT_INTERNAL)
3480 		goto getChild;
3481 
3482 	/*
3483 	 *      leaf page
3484 	 */
3485 
3486 	/* Since this is the rightmost leaf, and we may have already freed
3487 	 * a page that was formerly to the right, let's make sure that the
3488 	 * next pointer is zero.
3489 	 */
3490 	if (p->header.next) {
3491 		if (log)
3492 			/*
3493 			 * Make sure this change to the header is logged.
3494 			 * If we really truncate this leaf, the flag
3495 			 * will be changed to tlckTRUNCATE
3496 			 */
3497 			tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
3498 		BT_MARK_DIRTY(mp, ip);
3499 		p->header.next = 0;
3500 	}
3501 
3502 	freed = 0;
3503 
3504 	/* does region covered by leaf page precede Teof ? */
3505 	xad = &p->xad[index];
3506 	xoff = offsetXAD(xad);
3507 	xlen = lengthXAD(xad);
3508 	if (teof >= xoff + xlen) {
3509 		XT_PUTPAGE(mp);
3510 		goto getParent;
3511 	}
3512 
3513 	/* (re)acquire tlock of the leaf page */
3514 	if (log) {
3515 		if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
3516 			/*
3517 			 * We need to limit the size of the transaction
3518 			 * to avoid exhausting pagecache & tlocks
3519 			 */
3520 			XT_PUTPAGE(mp);
3521 			newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
3522 			goto getParent;
3523 		}
3524 		tlck = txLock(tid, ip, mp, tlckXTREE);
3525 		tlck->type = tlckXTREE | tlckTRUNCATE;
3526 		xtlck = (struct xtlock *) & tlck->lock;
3527 		xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
3528 	}
3529 	BT_MARK_DIRTY(mp, ip);
3530 
3531 	/*
3532 	 * scan backward leaf page entries
3533 	 */
3534 	for (; index >= XTENTRYSTART; index--) {
3535 		xad = &p->xad[index];
3536 		xoff = offsetXAD(xad);
3537 		xlen = lengthXAD(xad);
3538 		xaddr = addressXAD(xad);
3539 
3540 		/*
3541 		 * The "data" for a directory is indexed by the block
3542 		 * device's address space.  This metadata must be invalidated
3543 		 * here
3544 		 */
3545 		if (S_ISDIR(ip->i_mode) && (teof == 0))
3546 			invalidate_xad_metapages(ip, *xad);
3547 		/*
3548 		 * entry beyond eof: continue scan of current page
3549 		 *          xad
3550 		 * ---|---=======------->
3551 		 *   eof
3552 		 */
3553 		if (teof < xoff) {
3554 			nfreed += xlen;
3555 			continue;
3556 		}
3557 
3558 		/*
3559 		 * (xoff <= teof): last entry to be deleted from page;
3560 		 * If other entries remain in page: keep and update the page.
3561 		 */
3562 
3563 		/*
3564 		 * eof == entry_start: delete the entry
3565 		 *           xad
3566 		 * -------|=======------->
3567 		 *       eof
3568 		 *
3569 		 */
3570 		if (teof == xoff) {
3571 			nfreed += xlen;
3572 
3573 			if (index == XTENTRYSTART)
3574 				break;
3575 
3576 			nextindex = index;
3577 		}
3578 		/*
3579 		 * eof within the entry: truncate the entry.
3580 		 *          xad
3581 		 * -------===|===------->
3582 		 *          eof
3583 		 */
3584 		else if (teof < xoff + xlen) {
3585 			/* update truncated entry */
3586 			len = teof - xoff;
3587 			freexlen = xlen - len;
3588 			XADlength(xad, len);
3589 
3590 			/* save pxd of truncated extent in tlck */
3591 			xaddr += len;
3592 			if (log) {	/* COMMIT_PWMAP */
3593 				xtlck->lwm.offset = (xtlck->lwm.offset) ?
3594 				    min(index, (int)xtlck->lwm.offset) : index;
3595 				xtlck->lwm.length = index + 1 -
3596 				    xtlck->lwm.offset;
3597 				xtlck->twm.offset = index;
3598 				pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
3599 				pxdlock->flag = mlckFREEPXD;
3600 				PXDaddress(&pxdlock->pxd, xaddr);
3601 				PXDlength(&pxdlock->pxd, freexlen);
3602 			}
3603 			/* free truncated extent */
3604 			else {	/* COMMIT_WMAP */
3605 
3606 				pxdlock = (struct pxd_lock *) & xadlock;
3607 				pxdlock->flag = mlckFREEPXD;
3608 				PXDaddress(&pxdlock->pxd, xaddr);
3609 				PXDlength(&pxdlock->pxd, freexlen);
3610 				txFreeMap(ip, pxdlock, 0, COMMIT_WMAP);
3611 
3612 				/* reset map lock */
3613 				xadlock.flag = mlckFREEXADLIST;
3614 			}
3615 
3616 			/* current entry is new last entry; */
3617 			nextindex = index + 1;
3618 
3619 			nfreed += freexlen;
3620 		}
3621 		/*
3622 		 * eof beyond the entry:
3623 		 *          xad
3624 		 * -------=======---|--->
3625 		 *                 eof
3626 		 */
3627 		else {		/* (xoff + xlen < teof) */
3628 
3629 			nextindex = index + 1;
3630 		}
3631 
3632 		if (nextindex < le16_to_cpu(p->header.nextindex)) {
3633 			if (!log) {	/* COMMIT_WAMP */
3634 				xadlock.xdlist = &p->xad[nextindex];
3635 				xadlock.count =
3636 				    le16_to_cpu(p->header.nextindex) -
3637 				    nextindex;
3638 				txFreeMap(ip, (struct maplock *) & xadlock, 0,
3639 					  COMMIT_WMAP);
3640 			}
3641 			p->header.nextindex = cpu_to_le16(nextindex);
3642 		}
3643 
3644 		XT_PUTPAGE(mp);
3645 
3646 		/* assert(freed == 0); */
3647 		goto getParent;
3648 	}			/* end scan of leaf page entries */
3649 
3650 	freed = 1;
3651 
3652 	/*
3653 	 * leaf page become empty: free the page if type != PMAP
3654 	 */
3655 	if (log) {		/* COMMIT_PWMAP */
3656 		/* txCommit() with tlckFREE:
3657 		 * free data extents covered by leaf [XTENTRYSTART:hwm);
3658 		 * invalidate leaf if COMMIT_PWMAP;
3659 		 * if (TRUNCATE), will write LOG_NOREDOPAGE;
3660 		 */
3661 		tlck->type = tlckXTREE | tlckFREE;
3662 	} else {		/* COMMIT_WAMP */
3663 
3664 		/* free data extents covered by leaf */
3665 		xadlock.xdlist = &p->xad[XTENTRYSTART];
3666 		xadlock.count =
3667 		    le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3668 		txFreeMap(ip, (struct maplock *) & xadlock, 0, COMMIT_WMAP);
3669 	}
3670 
3671 	if (p->header.flag & BT_ROOT) {
3672 		p->header.flag &= ~BT_INTERNAL;
3673 		p->header.flag |= BT_LEAF;
3674 		p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3675 
3676 		XT_PUTPAGE(mp);	/* debug */
3677 		goto out;
3678 	} else {
3679 		if (log) {	/* COMMIT_PWMAP */
3680 			/* page will be invalidated at tx completion
3681 			 */
3682 			XT_PUTPAGE(mp);
3683 		} else {	/* COMMIT_WMAP */
3684 
3685 			if (mp->lid)
3686 				lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
3687 
3688 			/* invalidate empty leaf page */
3689 			discard_metapage(mp);
3690 		}
3691 	}
3692 
3693 	/*
3694 	 * the leaf page become empty: delete the parent entry
3695 	 * for the leaf page if the parent page is to be kept
3696 	 * in the new sized file.
3697 	 */
3698 
3699 	/*
3700 	 * go back up to the parent page
3701 	 */
3702       getParent:
3703 	/* pop/restore parent entry for the current child page */
3704 	if ((parent = BT_POP(&btstack)) == NULL)
3705 		/* current page must have been root */
3706 		goto out;
3707 
3708 	/* get back the parent page */
3709 	bn = parent->bn;
3710 	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3711 	if (rc)
3712 		return rc;
3713 
3714 	index = parent->index;
3715 
3716 	/*
3717 	 * child page was not empty:
3718 	 */
3719 	if (freed == 0) {
3720 		/* has any entry deleted from parent ? */
3721 		if (index < le16_to_cpu(p->header.nextindex) - 1) {
3722 			/* (re)acquire tlock on the parent page */
3723 			if (log) {	/* COMMIT_PWMAP */
3724 				/* txCommit() with tlckTRUNCATE:
3725 				 * free child extents covered by parent [);
3726 				 */
3727 				tlck = txLock(tid, ip, mp, tlckXTREE);
3728 				xtlck = (struct xtlock *) & tlck->lock;
3729 				if (!(tlck->type & tlckTRUNCATE)) {
3730 					xtlck->hwm.offset =
3731 					    le16_to_cpu(p->header.
3732 							nextindex) - 1;
3733 					tlck->type =
3734 					    tlckXTREE | tlckTRUNCATE;
3735 				}
3736 			} else {	/* COMMIT_WMAP */
3737 
3738 				/* free child extents covered by parent */
3739 				xadlock.xdlist = &p->xad[index + 1];
3740 				xadlock.count =
3741 				    le16_to_cpu(p->header.nextindex) -
3742 				    index - 1;
3743 				txFreeMap(ip, (struct maplock *) & xadlock, 0,
3744 					  COMMIT_WMAP);
3745 			}
3746 			BT_MARK_DIRTY(mp, ip);
3747 
3748 			p->header.nextindex = cpu_to_le16(index + 1);
3749 		}
3750 		XT_PUTPAGE(mp);
3751 		goto getParent;
3752 	}
3753 
3754 	/*
3755 	 * child page was empty:
3756 	 */
3757 	nfreed += lengthXAD(&p->xad[index]);
3758 
3759 	/*
3760 	 * During working map update, child page's tlock must be handled
3761 	 * before parent's.  This is because the parent's tlock will cause
3762 	 * the child's disk space to be marked available in the wmap, so
3763 	 * it's important that the child page be released by that time.
3764 	 *
3765 	 * ToDo:  tlocks should be on doubly-linked list, so we can
3766 	 * quickly remove it and add it to the end.
3767 	 */
3768 
3769 	/*
3770 	 * Move parent page's tlock to the end of the tid's tlock list
3771 	 */
3772 	if (log && mp->lid && (tblk->last != mp->lid) &&
3773 	    lid_to_tlock(mp->lid)->tid) {
3774 		lid_t lid = mp->lid;
3775 		struct tlock *prev;
3776 
3777 		tlck = lid_to_tlock(lid);
3778 
3779 		if (tblk->next == lid)
3780 			tblk->next = tlck->next;
3781 		else {
3782 			for (prev = lid_to_tlock(tblk->next);
3783 			     prev->next != lid;
3784 			     prev = lid_to_tlock(prev->next)) {
3785 				assert(prev->next);
3786 			}
3787 			prev->next = tlck->next;
3788 		}
3789 		lid_to_tlock(tblk->last)->next = lid;
3790 		tlck->next = 0;
3791 		tblk->last = lid;
3792 	}
3793 
3794 	/*
3795 	 * parent page become empty: free the page
3796 	 */
3797 	if (index == XTENTRYSTART) {
3798 		if (log) {	/* COMMIT_PWMAP */
3799 			/* txCommit() with tlckFREE:
3800 			 * free child extents covered by parent;
3801 			 * invalidate parent if COMMIT_PWMAP;
3802 			 */
3803 			tlck = txLock(tid, ip, mp, tlckXTREE);
3804 			xtlck = (struct xtlock *) & tlck->lock;
3805 			xtlck->hwm.offset =
3806 			    le16_to_cpu(p->header.nextindex) - 1;
3807 			tlck->type = tlckXTREE | tlckFREE;
3808 		} else {	/* COMMIT_WMAP */
3809 
3810 			/* free child extents covered by parent */
3811 			xadlock.xdlist = &p->xad[XTENTRYSTART];
3812 			xadlock.count =
3813 			    le16_to_cpu(p->header.nextindex) -
3814 			    XTENTRYSTART;
3815 			txFreeMap(ip, (struct maplock *) & xadlock, 0,
3816 				  COMMIT_WMAP);
3817 		}
3818 		BT_MARK_DIRTY(mp, ip);
3819 
3820 		if (p->header.flag & BT_ROOT) {
3821 			p->header.flag &= ~BT_INTERNAL;
3822 			p->header.flag |= BT_LEAF;
3823 			p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3824 			if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
3825 				/*
3826 				 * Shrink root down to allow inline
3827 				 * EA (otherwise fsck complains)
3828 				 */
3829 				p->header.maxentry =
3830 				    cpu_to_le16(XTROOTINITSLOT);
3831 				JFS_IP(ip)->mode2 |= INLINEEA;
3832 			}
3833 
3834 			XT_PUTPAGE(mp);	/* debug */
3835 			goto out;
3836 		} else {
3837 			if (log) {	/* COMMIT_PWMAP */
3838 				/* page will be invalidated at tx completion
3839 				 */
3840 				XT_PUTPAGE(mp);
3841 			} else {	/* COMMIT_WMAP */
3842 
3843 				if (mp->lid)
3844 					lid_to_tlock(mp->lid)->flag |=
3845 						tlckFREELOCK;
3846 
3847 				/* invalidate parent page */
3848 				discard_metapage(mp);
3849 			}
3850 
3851 			/* parent has become empty and freed:
3852 			 * go back up to its parent page
3853 			 */
3854 			/* freed = 1; */
3855 			goto getParent;
3856 		}
3857 	}
3858 	/*
3859 	 * parent page still has entries for front region;
3860 	 */
3861 	else {
3862 		/* try truncate region covered by preceding entry
3863 		 * (process backward)
3864 		 */
3865 		index--;
3866 
3867 		/* go back down to the child page corresponding
3868 		 * to the entry
3869 		 */
3870 		goto getChild;
3871 	}
3872 
3873 	/*
3874 	 *      internal page: go down to child page of current entry
3875 	 */
3876       getChild:
3877 	/* save current parent entry for the child page */
3878 	BT_PUSH(&btstack, bn, index);
3879 
3880 	/* get child page */
3881 	xad = &p->xad[index];
3882 	bn = addressXAD(xad);
3883 
3884 	/*
3885 	 * first access of each internal entry:
3886 	 */
3887 	/* release parent page */
3888 	XT_PUTPAGE(mp);
3889 
3890 	/* process the child page */
3891 	goto getPage;
3892 
3893       out:
3894 	/*
3895 	 * update file resource stat
3896 	 */
3897 	/* set size
3898 	 */
3899 	if (S_ISDIR(ip->i_mode) && !newsize)
3900 		ip->i_size = 1;	/* fsck hates zero-length directories */
3901 	else
3902 		ip->i_size = newsize;
3903 
3904 	/* update nblocks to reflect freed blocks */
3905 	ip->i_blocks -= LBLK2PBLK(ip->i_sb, nfreed);
3906 
3907 	/*
3908 	 * free tlock of invalidated pages
3909 	 */
3910 	if (flag == COMMIT_WMAP)
3911 		txFreelock(ip);
3912 
3913 	return newsize;
3914 }
3915 
3916 
3917 /*
3918  *      xtTruncate_pmap()
3919  *
3920  * function:
3921  *	Perform truncate to zero lenghth for deleted file, leaving the
3922  *	the xtree and working map untouched.  This allows the file to
3923  *	be accessed via open file handles, while the delete of the file
3924  *	is committed to disk.
3925  *
3926  * parameter:
3927  *      tid_t		tid,
3928  *      struct inode	*ip,
3929  *      s64		committed_size)
3930  *
3931  * return: new committed size
3932  *
3933  * note:
3934  *
3935  *	To avoid deadlock by holding too many transaction locks, the
3936  *	truncation may be broken up into multiple transactions.
3937  *	The committed_size keeps track of part of the file has been
3938  *	freed from the pmaps.
3939  */
xtTruncate_pmap(tid_t tid,struct inode * ip,s64 committed_size)3940 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
3941 {
3942 	s64 bn;
3943 	struct btstack btstack;
3944 	int cmp;
3945 	int index;
3946 	int locked_leaves = 0;
3947 	struct metapage *mp;
3948 	xtpage_t *p;
3949 	struct btframe *parent;
3950 	int rc;
3951 	struct tblock *tblk;
3952 	struct tlock *tlck = 0;
3953 	xad_t *xad;
3954 	int xlen;
3955 	s64 xoff;
3956 	struct xtlock *xtlck = 0;
3957 
3958 	/* save object truncation type */
3959 	tblk = tid_to_tblock(tid);
3960 	tblk->xflag |= COMMIT_PMAP;
3961 
3962 	/* clear stack */
3963 	BT_CLR(&btstack);
3964 
3965 	if (committed_size) {
3966 		xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
3967 		rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
3968 		if (rc)
3969 			return rc;
3970 
3971 		XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3972 
3973 		if (cmp != 0) {
3974 			XT_PUTPAGE(mp);
3975 			jfs_error(ip->i_sb,
3976 				  "xtTruncate_pmap: did not find extent");
3977 			return -EIO;
3978 		}
3979 	} else {
3980 		/*
3981 		 * start with root
3982 		 *
3983 		 * root resides in the inode
3984 		 */
3985 		bn = 0;
3986 
3987 		/*
3988 		 * first access of each page:
3989 		 */
3990       getPage:
3991 		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3992 		if (rc)
3993 			return rc;
3994 
3995 		/* process entries backward from last index */
3996 		index = le16_to_cpu(p->header.nextindex) - 1;
3997 
3998 		if (p->header.flag & BT_INTERNAL)
3999 			goto getChild;
4000 	}
4001 
4002 	/*
4003 	 *      leaf page
4004 	 */
4005 
4006 	if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
4007 		/*
4008 		 * We need to limit the size of the transaction
4009 		 * to avoid exhausting pagecache & tlocks
4010 		 */
4011 		xad = &p->xad[index];
4012 		xoff = offsetXAD(xad);
4013 		xlen = lengthXAD(xad);
4014 		XT_PUTPAGE(mp);
4015 		return  (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
4016 	}
4017 	tlck = txLock(tid, ip, mp, tlckXTREE);
4018 	tlck->type = tlckXTREE | tlckFREE;
4019 	xtlck = (struct xtlock *) & tlck->lock;
4020 	xtlck->hwm.offset = index;
4021 
4022 
4023 	XT_PUTPAGE(mp);
4024 
4025 	/*
4026 	 * go back up to the parent page
4027 	 */
4028       getParent:
4029 	/* pop/restore parent entry for the current child page */
4030 	if ((parent = BT_POP(&btstack)) == NULL)
4031 		/* current page must have been root */
4032 		goto out;
4033 
4034 	/* get back the parent page */
4035 	bn = parent->bn;
4036 	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4037 	if (rc)
4038 		return rc;
4039 
4040 	index = parent->index;
4041 
4042 	/*
4043 	 * parent page become empty: free the page
4044 	 */
4045 	if (index == XTENTRYSTART) {
4046 		/* txCommit() with tlckFREE:
4047 		 * free child extents covered by parent;
4048 		 * invalidate parent if COMMIT_PWMAP;
4049 		 */
4050 		tlck = txLock(tid, ip, mp, tlckXTREE);
4051 		xtlck = (struct xtlock *) & tlck->lock;
4052 		xtlck->hwm.offset =
4053 		    le16_to_cpu(p->header.nextindex) - 1;
4054 		tlck->type = tlckXTREE | tlckFREE;
4055 
4056 		XT_PUTPAGE(mp);
4057 
4058 		if (p->header.flag & BT_ROOT) {
4059 
4060 			goto out;
4061 		} else {
4062 			goto getParent;
4063 		}
4064 	}
4065 	/*
4066 	 * parent page still has entries for front region;
4067 	 */
4068 	else
4069 		index--;
4070 	/*
4071 	 *      internal page: go down to child page of current entry
4072 	 */
4073       getChild:
4074 	/* save current parent entry for the child page */
4075 	BT_PUSH(&btstack, bn, index);
4076 
4077 	/* get child page */
4078 	xad = &p->xad[index];
4079 	bn = addressXAD(xad);
4080 
4081 	/*
4082 	 * first access of each internal entry:
4083 	 */
4084 	/* release parent page */
4085 	XT_PUTPAGE(mp);
4086 
4087 	/* process the child page */
4088 	goto getPage;
4089 
4090       out:
4091 
4092 	return 0;
4093 }
4094 
4095 
4096 #ifdef _JFS_DEBUG_XTREE
4097 /*
4098  *      xtDisplayTree()
4099  *
4100  * function: traverse forward
4101  */
xtDisplayTree(struct inode * ip)4102 int xtDisplayTree(struct inode *ip)
4103 {
4104 	int rc = 0;
4105 	struct metapage *mp;
4106 	xtpage_t *p;
4107 	s64 bn, pbn;
4108 	int index, lastindex, v, h;
4109 	xad_t *xad;
4110 	struct btstack btstack;
4111 	struct btframe *btsp;
4112 	struct btframe *parent;
4113 
4114 	printk("display B+-tree.\n");
4115 
4116 	/* clear stack */
4117 	btsp = btstack.stack;
4118 
4119 	/*
4120 	 * start with root
4121 	 *
4122 	 * root resides in the inode
4123 	 */
4124 	bn = 0;
4125 	v = h = 0;
4126 
4127 	/*
4128 	 * first access of each page:
4129 	 */
4130       getPage:
4131 	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4132 	if (rc)
4133 		return rc;
4134 
4135 	/* process entries forward from first index */
4136 	index = XTENTRYSTART;
4137 	lastindex = le16_to_cpu(p->header.nextindex) - 1;
4138 
4139 	if (p->header.flag & BT_INTERNAL) {
4140 		/*
4141 		 * first access of each internal page
4142 		 */
4143 		goto getChild;
4144 	} else {		/* (p->header.flag & BT_LEAF) */
4145 
4146 		/*
4147 		 * first access of each leaf page
4148 		 */
4149 		printf("leaf page ");
4150 		xtDisplayPage(ip, bn, p);
4151 
4152 		/* unpin the leaf page */
4153 		XT_PUTPAGE(mp);
4154 	}
4155 
4156 	/*
4157 	 * go back up to the parent page
4158 	 */
4159       getParent:
4160 	/* pop/restore parent entry for the current child page */
4161 	if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL)
4162 		/* current page must have been root */
4163 		return;
4164 
4165 	/*
4166 	 * parent page scan completed
4167 	 */
4168 	if ((index = parent->index) == (lastindex = parent->lastindex)) {
4169 		/* go back up to the parent page */
4170 		goto getParent;
4171 	}
4172 
4173 	/*
4174 	 * parent page has entries remaining
4175 	 */
4176 	/* get back the parent page */
4177 	bn = parent->bn;
4178 	/* v = parent->level; */
4179 	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4180 	if (rc)
4181 		return rc;
4182 
4183 	/* get next parent entry */
4184 	index++;
4185 
4186 	/*
4187 	 * internal page: go down to child page of current entry
4188 	 */
4189       getChild:
4190 	/* push/save current parent entry for the child page */
4191 	btsp->bn = pbn = bn;
4192 	btsp->index = index;
4193 	btsp->lastindex = lastindex;
4194 	/* btsp->level = v; */
4195 	/* btsp->node = h; */
4196 	++btsp;
4197 
4198 	/* get child page */
4199 	xad = &p->xad[index];
4200 	bn = addressXAD(xad);
4201 
4202 	/*
4203 	 * first access of each internal entry:
4204 	 */
4205 	/* release parent page */
4206 	XT_PUTPAGE(mp);
4207 
4208 	printk("traverse down 0x%lx[%d]->0x%lx\n", (ulong) pbn, index,
4209 	       (ulong) bn);
4210 	v++;
4211 	h = index;
4212 
4213 	/* process the child page */
4214 	goto getPage;
4215 }
4216 
4217 
4218 /*
4219  *      xtDisplayPage()
4220  *
4221  * function: display page
4222  */
xtDisplayPage(struct inode * ip,s64 bn,xtpage_t * p)4223 int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p)
4224 {
4225 	int rc = 0;
4226 	xad_t *xad;
4227 	s64 xaddr, xoff;
4228 	int xlen, i, j;
4229 
4230 	/* display page control */
4231 	printf("bn:0x%lx flag:0x%x nextindex:%d\n",
4232 	       (ulong) bn, p->header.flag,
4233 	       le16_to_cpu(p->header.nextindex));
4234 
4235 	/* display entries */
4236 	xad = &p->xad[XTENTRYSTART];
4237 		for (i = XTENTRYSTART, j = 1; i < le16_to_cpu(p->header.nextindex);
4238 		     i++, xad++, j++) {
4239 			xoff = offsetXAD(xad);
4240 			xaddr = addressXAD(xad);
4241 			xlen = lengthXAD(xad);
4242 			printf("\t[%d] 0x%lx:0x%lx(0x%x)", i, (ulong) xoff,
4243 			       (ulong) xaddr, xlen);
4244 
4245 			if (j == 4) {
4246 				printf("\n");
4247 				j = 0;
4248 		}
4249 	}
4250 
4251 	printf("\n");
4252 }
4253 #endif				/* _JFS_DEBUG_XTREE */
4254 
4255 
4256 #ifdef _JFS_WIP
4257 /*
4258  *      xtGather()
4259  *
4260  * function:
4261  *      traverse for allocation acquiring tlock at commit time
4262  *      (vs at the time of update) logging backward top down
4263  *
4264  * note:
4265  *      problem - establishing that all new allocation have been
4266  *      processed both for append and random write in sparse file
4267  *      at the current entry at the current subtree root page
4268  *
4269  */
xtGather(btree_t * t)4270 int xtGather(btree_t *t)
4271 {
4272 	int rc = 0;
4273 	xtpage_t *p;
4274 	u64 bn;
4275 	int index;
4276 	btentry_t *e;
4277 	struct btstack btstack;
4278 	struct btsf *parent;
4279 
4280 	/* clear stack */
4281 	BT_CLR(&btstack);
4282 
4283 	/*
4284 	 * start with root
4285 	 *
4286 	 * root resides in the inode
4287 	 */
4288 	bn = 0;
4289 	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4290 	if (rc)
4291 		return rc;
4292 
4293 	/* new root is NOT pointed by a new entry
4294 	   if (p->header.flag & NEW)
4295 	   allocate new page lock;
4296 	   write a NEWPAGE log;
4297 	 */
4298 
4299       dopage:
4300 	/*
4301 	 * first access of each page:
4302 	 */
4303 	/* process entries backward from last index */
4304 	index = le16_to_cpu(p->header.nextindex) - 1;
4305 
4306 	if (p->header.flag & BT_LEAF) {
4307 		/*
4308 		 * first access of each leaf page
4309 		 */
4310 		/* process leaf page entries backward */
4311 		for (; index >= XTENTRYSTART; index--) {
4312 			e = &p->xad[index];
4313 			/*
4314 			 * if newpage, log NEWPAGE.
4315 			 *
4316 			 if (e->flag & XAD_NEW) {
4317 			 nfound =+ entry->length;
4318 			 update current page lock for the entry;
4319 			 newpage(entry);
4320 			 *
4321 			 * if moved, log move.
4322 			 *
4323 			 } else if (e->flag & XAD_MOVED) {
4324 			 reset flag;
4325 			 update current page lock for the entry;
4326 			 }
4327 			 */
4328 		}
4329 
4330 		/* unpin the leaf page */
4331 		XT_PUTPAGE(mp);
4332 
4333 		/*
4334 		 * go back up to the parent page
4335 		 */
4336 	      getParent:
4337 		/* restore parent entry for the current child page */
4338 		if ((parent = BT_POP(&btstack)) == NULL)
4339 			/* current page must have been root */
4340 			return 0;
4341 
4342 		if ((index = parent->index) == XTENTRYSTART) {
4343 			/*
4344 			 * parent page scan completed
4345 			 */
4346 			/* go back up to the parent page */
4347 			goto getParent;
4348 		} else {
4349 			/*
4350 			 * parent page has entries remaining
4351 			 */
4352 			/* get back the parent page */
4353 			bn = parent->bn;
4354 			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4355 			if (rc)
4356 				return -EIO;
4357 
4358 			/* first subroot page which
4359 			 * covers all new allocated blocks
4360 			 * itself not new/modified.
4361 			 * (if modified from split of descendent,
4362 			 * go down path of split page)
4363 
4364 			 if (nfound == nnew &&
4365 			 !(p->header.flag & (NEW | MOD)))
4366 			 exit scan;
4367 			 */
4368 
4369 			/* process parent page entries backward */
4370 			index--;
4371 		}
4372 	} else {
4373 		/*
4374 		 * first access of each internal page
4375 		 */
4376 	}
4377 
4378 	/*
4379 	 * internal page: go down to child page of current entry
4380 	 */
4381 
4382 	/* save current parent entry for the child page */
4383 	BT_PUSH(&btstack, bn, index);
4384 
4385 	/* get current entry for the child page */
4386 	e = &p->xad[index];
4387 
4388 	/*
4389 	 * first access of each internal entry:
4390 	 */
4391 	/*
4392 	 * if new entry, log btree_tnewentry.
4393 	 *
4394 	 if (e->flag & XAD_NEW)
4395 	 update parent page lock for the entry;
4396 	 */
4397 
4398 	/* release parent page */
4399 	XT_PUTPAGE(mp);
4400 
4401 	/* get child page */
4402 	bn = e->bn;
4403 	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4404 	if (rc)
4405 		return rc;
4406 
4407 	/*
4408 	 * first access of each non-root page:
4409 	 */
4410 	/*
4411 	 * if new, log btree_newpage.
4412 	 *
4413 	 if (p->header.flag & NEW)
4414 	 allocate new page lock;
4415 	 write a NEWPAGE log (next, prev);
4416 	 */
4417 
4418 	/* process the child page */
4419 	goto dopage;
4420 
4421       out:
4422 	return 0;
4423 }
4424 #endif				/* _JFS_WIP */
4425 
4426 
4427 #ifdef CONFIG_JFS_STATISTICS
jfs_xtstat_read(char * buffer,char ** start,off_t offset,int length,int * eof,void * data)4428 int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
4429 		    int *eof, void *data)
4430 {
4431 	int len = 0;
4432 	off_t begin;
4433 
4434 	len += sprintf(buffer,
4435 		       "JFS Xtree statistics\n"
4436 		       "====================\n"
4437 		       "searches = %d\n"
4438 		       "fast searches = %d\n"
4439 		       "splits = %d\n",
4440 		       xtStat.search,
4441 		       xtStat.fastSearch,
4442 		       xtStat.split);
4443 
4444 	begin = offset;
4445 	*start = buffer + begin;
4446 	len -= begin;
4447 
4448 	if (len > length)
4449 		len = length;
4450 	else
4451 		*eof = 1;
4452 
4453 	if (len < 0)
4454 		len = 0;
4455 
4456 	return len;
4457 }
4458 #endif
4459