1 /*
2  *   Copyright (C) International Business Machines Corp., 2000-2004
3  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
4  *
5  *   This program is free software;  you can redistribute it and/or modify
6  *   it under the terms of the GNU General Public License as published by
7  *   the Free Software Foundation; either version 2 of the License, or
8  *   (at your option) any later version.
9  *
10  *   This program is distributed in the hope that it will be useful,
11  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
13  *   the GNU General Public License for more details.
14  *
15  *   You should have received a copy of the GNU General Public License
16  *   along with this program;  if not, write to the Free Software
17  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19 
20 /*
21  *      jfs_txnmgr.c: transaction manager
22  *
23  * notes:
24  * transaction starts with txBegin() and ends with txCommit()
25  * or txAbort().
26  *
27  * tlock is acquired at the time of update;
28  * (obviate scan at commit time for xtree and dtree)
29  * tlock and mp points to each other;
30  * (no hashlist for mp -> tlock).
31  *
32  * special cases:
33  * tlock on in-memory inode:
34  * in-place tlock in the in-memory inode itself;
35  * converted to page lock by iWrite() at commit time.
36  *
37  * tlock during write()/mmap() under anonymous transaction (tid = 0):
38  * transferred (?) to transaction at commit time.
39  *
40  * use the page itself to update allocation maps
41  * (obviate intermediate replication of allocation/deallocation data)
42  * hold on to mp+lock thru update of maps
43  */
44 
45 
46 #include <linux/fs.h>
47 #include <linux/vmalloc.h>
48 #include <linux/smp_lock.h>
49 #include <linux/completion.h>
50 #include "jfs_incore.h"
51 #include "jfs_filsys.h"
52 #include "jfs_metapage.h"
53 #include "jfs_dinode.h"
54 #include "jfs_imap.h"
55 #include "jfs_dmap.h"
56 #include "jfs_superblock.h"
57 #include "jfs_debug.h"
58 
59 /*
60  *      transaction management structures
61  */
62 static struct {
63 	/* tblock */
64 	int freetid;		/* index of a free tid structure */
65 	wait_queue_head_t freewait;	/* eventlist of free tblock */
66 
67 	/* tlock */
68 	int freelock;		/* index first free lock word */
69 	wait_queue_head_t freelockwait;	/* eventlist of free tlock */
70 	wait_queue_head_t lowlockwait;	/* eventlist of ample tlocks */
71 	int tlocksInUse;	/* Number of tlocks in use */
72 	int TlocksLow;		/* Indicates low number of available tlocks */
73 	spinlock_t LazyLock;	/* synchronize sync_queue & unlock_queue */
74 /*	struct tblock *sync_queue; * Transactions waiting for data sync */
75 	struct tblock *unlock_queue;	/* Txns waiting to be released */
76 	struct tblock *unlock_tail;	/* Tail of unlock_queue */
77 	struct list_head anon_list;	/* inodes having anonymous txns */
78 	struct list_head anon_list2;	/* inodes having anonymous txns
79 					   that couldn't be sync'ed */
80 } TxAnchor;
81 
82 #ifdef CONFIG_JFS_STATISTICS
83 struct {
84 	uint txBegin;
85 	uint txBegin_barrier;
86 	uint txBegin_lockslow;
87 	uint txBegin_freetid;
88 	uint txBeginAnon;
89 	uint txBeginAnon_barrier;
90 	uint txBeginAnon_lockslow;
91 	uint txLockAlloc;
92 	uint txLockAlloc_freelock;
93 } TxStat;
94 #endif
95 
96 static int nTxBlock = 512;	/* number of transaction blocks */
97 struct tblock *TxBlock;	        /* transaction block table */
98 
99 static int nTxLock = 4096;	/* number of transaction locks */
100 static int TxLockLWM = 4096*.4;	/* Low water mark for number of txLocks used */
101 static int TxLockHWM = 4096*.8;	/* High water mark for number of txLocks used */
102 struct tlock *TxLock;           /* transaction lock table */
103 
104 
105 /*
106  *      transaction management lock
107  */
108 static spinlock_t jfsTxnLock = SPIN_LOCK_UNLOCKED;
109 
110 #define TXN_LOCK()              spin_lock(&jfsTxnLock)
111 #define TXN_UNLOCK()            spin_unlock(&jfsTxnLock)
112 
113 #define LAZY_LOCK_INIT()	spin_lock_init(&TxAnchor.LazyLock);
114 #define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
115 #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
116 
117 DECLARE_WAIT_QUEUE_HEAD(jfs_sync_thread_wait);
118 DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
119 
120 /*
121  * Retry logic exist outside these macros to protect from spurrious wakeups.
122  */
TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)123 static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
124 {
125 	DECLARE_WAITQUEUE(wait, current);
126 
127 	add_wait_queue(event, &wait);
128 	set_current_state(TASK_UNINTERRUPTIBLE);
129 	TXN_UNLOCK();
130 	schedule();
131 	current->state = TASK_RUNNING;
132 	remove_wait_queue(event, &wait);
133 }
134 
135 #define TXN_SLEEP(event)\
136 {\
137 	TXN_SLEEP_DROP_LOCK(event);\
138 	TXN_LOCK();\
139 }
140 
141 #define TXN_WAKEUP(event) wake_up_all(event)
142 
143 
144 /*
145  *      statistics
146  */
147 struct {
148 	tid_t maxtid;		/* 4: biggest tid ever used */
149 	lid_t maxlid;		/* 4: biggest lid ever used */
150 	int ntid;		/* 4: # of transactions performed */
151 	int nlid;		/* 4: # of tlocks acquired */
152 	int waitlock;		/* 4: # of tlock wait */
153 } stattx;
154 
155 
156 /*
157  * external references
158  */
159 extern int lmGroupCommit(struct jfs_log *, struct tblock *);
160 extern void lmSync(struct jfs_log *);
161 extern int jfs_commit_inode(struct inode *, int);
162 extern int jfs_stop_threads;
163 
164 struct task_struct *jfsCommitTask;
165 extern struct completion jfsIOwait;
166 
167 /*
168  * forward references
169  */
170 static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
171 		struct tlock * tlck, struct commit * cd);
172 static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
173 		struct tlock * tlck);
174 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
175 		struct tlock * tlck);
176 static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
177 		struct tlock * tlck);
178 static void txAllocPMap(struct inode *ip, struct maplock * maplock,
179 		struct tblock * tblk);
180 static void txForce(struct tblock * tblk);
181 static int txLog(struct jfs_log * log, struct tblock * tblk,
182 		struct commit * cd);
183 static void txUpdateMap(struct tblock * tblk);
184 static void txRelease(struct tblock * tblk);
185 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
186 	   struct tlock * tlck);
187 static void LogSyncRelease(struct metapage * mp);
188 
189 /*
190  *              transaction block/lock management
191  *              ---------------------------------
192  */
193 
194 /*
195  * Get a transaction lock from the free list.  If the number in use is
196  * greater than the high water mark, wake up the sync daemon.  This should
197  * free some anonymous transaction locks.  (TXN_LOCK must be held.)
198  */
txLockAlloc(void)199 static lid_t txLockAlloc(void)
200 {
201 	lid_t lid;
202 
203 	INCREMENT(TxStat.txLockAlloc);
204 	if (!TxAnchor.freelock) {
205 		INCREMENT(TxStat.txLockAlloc_freelock);
206 	}
207 
208 	while (!(lid = TxAnchor.freelock))
209 		TXN_SLEEP(&TxAnchor.freelockwait);
210 	TxAnchor.freelock = TxLock[lid].next;
211 	HIGHWATERMARK(stattx.maxlid, lid);
212 	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (TxAnchor.TlocksLow == 0)) {
213 		jfs_info("txLockAlloc TlocksLow");
214 		TxAnchor.TlocksLow = 1;
215 		wake_up(&jfs_sync_thread_wait);
216 	}
217 
218 	return lid;
219 }
220 
txLockFree(lid_t lid)221 static void txLockFree(lid_t lid)
222 {
223 	TxLock[lid].next = TxAnchor.freelock;
224 	TxAnchor.freelock = lid;
225 	TxAnchor.tlocksInUse--;
226 	if (TxAnchor.TlocksLow && (TxAnchor.tlocksInUse < TxLockLWM)) {
227 		jfs_info("txLockFree TlocksLow no more");
228 		TxAnchor.TlocksLow = 0;
229 		TXN_WAKEUP(&TxAnchor.lowlockwait);
230 	}
231 	TXN_WAKEUP(&TxAnchor.freelockwait);
232 }
233 
234 /*
235  * NAME:        txInit()
236  *
237  * FUNCTION:    initialize transaction management structures
238  *
239  * RETURN:
240  *
241  * serialization: single thread at jfs_init()
242  */
txInit(void)243 int txInit(void)
244 {
245 	int k, size;
246 
247 	/*
248 	 * initialize transaction block (tblock) table
249 	 *
250 	 * transaction id (tid) = tblock index
251 	 * tid = 0 is reserved.
252 	 */
253 	size = sizeof(struct tblock) * nTxBlock;
254 	TxBlock = (struct tblock *) vmalloc(size);
255 	if (TxBlock == NULL)
256 		return -ENOMEM;
257 
258 	for (k = 1; k < nTxBlock - 1; k++) {
259 		TxBlock[k].next = k + 1;
260 		init_waitqueue_head(&TxBlock[k].gcwait);
261 		init_waitqueue_head(&TxBlock[k].waitor);
262 	}
263 	TxBlock[k].next = 0;
264 	init_waitqueue_head(&TxBlock[k].gcwait);
265 	init_waitqueue_head(&TxBlock[k].waitor);
266 
267 	TxAnchor.freetid = 1;
268 	init_waitqueue_head(&TxAnchor.freewait);
269 
270 	stattx.maxtid = 1;	/* statistics */
271 
272 	/*
273 	 * initialize transaction lock (tlock) table
274 	 *
275 	 * transaction lock id = tlock index
276 	 * tlock id = 0 is reserved.
277 	 */
278 	size = sizeof(struct tlock) * nTxLock;
279 	TxLock = (struct tlock *) vmalloc(size);
280 	if (TxLock == NULL) {
281 		vfree(TxBlock);
282 		return -ENOMEM;
283 	}
284 
285 	/* initialize tlock table */
286 	for (k = 1; k < nTxLock - 1; k++)
287 		TxLock[k].next = k + 1;
288 	TxLock[k].next = 0;
289 	init_waitqueue_head(&TxAnchor.freelockwait);
290 	init_waitqueue_head(&TxAnchor.lowlockwait);
291 
292 	TxAnchor.freelock = 1;
293 	TxAnchor.tlocksInUse = 0;
294 	INIT_LIST_HEAD(&TxAnchor.anon_list);
295 	INIT_LIST_HEAD(&TxAnchor.anon_list2);
296 
297 	stattx.maxlid = 1;	/* statistics */
298 
299 	return 0;
300 }
301 
302 /*
303  * NAME:        txExit()
304  *
305  * FUNCTION:    clean up when module is unloaded
306  */
txExit(void)307 void txExit(void)
308 {
309 	vfree(TxLock);
310 	TxLock = 0;
311 	vfree(TxBlock);
312 	TxBlock = 0;
313 }
314 
315 
316 /*
317  * NAME:        txBegin()
318  *
319  * FUNCTION:    start a transaction.
320  *
321  * PARAMETER:   sb	- superblock
322  *              flag	- force for nested tx;
323  *
324  * RETURN:	tid	- transaction id
325  *
326  * note: flag force allows to start tx for nested tx
327  * to prevent deadlock on logsync barrier;
328  */
txBegin(struct super_block * sb,int flag)329 tid_t txBegin(struct super_block *sb, int flag)
330 {
331 	tid_t t;
332 	struct tblock *tblk;
333 	struct jfs_log *log;
334 
335 	jfs_info("txBegin: flag = 0x%x", flag);
336 	log = JFS_SBI(sb)->log;
337 
338 	TXN_LOCK();
339 
340 	INCREMENT(TxStat.txBegin);
341 
342       retry:
343 	if (!(flag & COMMIT_FORCE)) {
344 		/*
345 		 * synchronize with logsync barrier
346 		 */
347 		if (test_bit(log_SYNCBARRIER, &log->flag) ||
348 		    test_bit(log_QUIESCE, &log->flag)) {
349 			INCREMENT(TxStat.txBegin_barrier);
350 			TXN_SLEEP(&log->syncwait);
351 			goto retry;
352 		}
353 	}
354 	if (flag == 0) {
355 		/*
356 		 * Don't begin transaction if we're getting starved for tlocks
357 		 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
358 		 * free tlocks)
359 		 */
360 		if (TxAnchor.TlocksLow) {
361 			INCREMENT(TxStat.txBegin_lockslow);
362 			TXN_SLEEP(&TxAnchor.lowlockwait);
363 			goto retry;
364 		}
365 	}
366 
367 	/*
368 	 * allocate transaction id/block
369 	 */
370 	if ((t = TxAnchor.freetid) == 0) {
371 		jfs_info("txBegin: waiting for free tid");
372 		INCREMENT(TxStat.txBegin_freetid);
373 		TXN_SLEEP(&TxAnchor.freewait);
374 		goto retry;
375 	}
376 
377 	tblk = tid_to_tblock(t);
378 
379 	if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
380 		/* Don't let a non-forced transaction take the last tblk */
381 		jfs_info("txBegin: waiting for free tid");
382 		INCREMENT(TxStat.txBegin_freetid);
383 		TXN_SLEEP(&TxAnchor.freewait);
384 		goto retry;
385 	}
386 
387 	TxAnchor.freetid = tblk->next;
388 
389 	/*
390 	 * initialize transaction
391 	 */
392 
393 	/*
394 	 * We can't zero the whole thing or we screw up another thread being
395 	 * awakened after sleeping on tblk->waitor
396 	 *
397 	 * memset(tblk, 0, sizeof(struct tblock));
398 	 */
399 	tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
400 
401 	tblk->sb = sb;
402 	++log->logtid;
403 	tblk->logtid = log->logtid;
404 
405 	++log->active;
406 
407 	HIGHWATERMARK(stattx.maxtid, t);	/* statistics */
408 	INCREMENT(stattx.ntid);	/* statistics */
409 
410 	TXN_UNLOCK();
411 
412 	jfs_info("txBegin: returning tid = %d", t);
413 
414 	return t;
415 }
416 
417 
418 /*
419  * NAME:        txBeginAnon()
420  *
421  * FUNCTION:    start an anonymous transaction.
422  *		Blocks if logsync or available tlocks are low to prevent
423  *		anonymous tlocks from depleting supply.
424  *
425  * PARAMETER:   sb	- superblock
426  *
427  * RETURN:	none
428  */
txBeginAnon(struct super_block * sb)429 void txBeginAnon(struct super_block *sb)
430 {
431 	struct jfs_log *log;
432 
433 	log = JFS_SBI(sb)->log;
434 
435 	TXN_LOCK();
436 	INCREMENT(TxStat.txBeginAnon);
437 
438       retry:
439 	/*
440 	 * synchronize with logsync barrier
441 	 */
442 	if (test_bit(log_SYNCBARRIER, &log->flag) ||
443 	    test_bit(log_QUIESCE, &log->flag)) {
444 		INCREMENT(TxStat.txBeginAnon_barrier);
445 		TXN_SLEEP(&log->syncwait);
446 		goto retry;
447 	}
448 
449 	/*
450 	 * Don't begin transaction if we're getting starved for tlocks
451 	 */
452 	if (TxAnchor.TlocksLow) {
453 		INCREMENT(TxStat.txBeginAnon_lockslow);
454 		TXN_SLEEP(&TxAnchor.lowlockwait);
455 		goto retry;
456 	}
457 	TXN_UNLOCK();
458 }
459 
460 
461 /*
462  *      txEnd()
463  *
464  * function: free specified transaction block.
465  *
466  *      logsync barrier processing:
467  *
468  * serialization:
469  */
txEnd(tid_t tid)470 void txEnd(tid_t tid)
471 {
472 	struct tblock *tblk = tid_to_tblock(tid);
473 	struct jfs_log *log;
474 
475 	jfs_info("txEnd: tid = %d", tid);
476 	TXN_LOCK();
477 
478 	/*
479 	 * wakeup transactions waiting on the page locked
480 	 * by the current transaction
481 	 */
482 	TXN_WAKEUP(&tblk->waitor);
483 
484 	log = JFS_SBI(tblk->sb)->log;
485 
486 	/*
487 	 * Lazy commit thread can't free this guy until we mark it UNLOCKED,
488 	 * otherwise, we would be left with a transaction that may have been
489 	 * reused.
490 	 *
491 	 * Lazy commit thread will turn off tblkGC_LAZY before calling this
492 	 * routine.
493 	 */
494 	if (tblk->flag & tblkGC_LAZY) {
495 		jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
496 		TXN_UNLOCK();
497 
498 		spin_lock_irq(&log->gclock);	// LOGGC_LOCK
499 		tblk->flag |= tblkGC_UNLOCKED;
500 		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
501 		return;
502 	}
503 
504 	jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
505 
506 	assert(tblk->next == 0);
507 
508 	/*
509 	 * insert tblock back on freelist
510 	 */
511 	tblk->next = TxAnchor.freetid;
512 	TxAnchor.freetid = tid;
513 
514 	/*
515 	 * mark the tblock not active
516 	 */
517 	if (--log->active == 0) {
518 		clear_bit(log_FLUSH, &log->flag);
519 
520 		/*
521 		 * synchronize with logsync barrier
522 		 */
523 		if (test_bit(log_SYNCBARRIER, &log->flag)) {
524 			/* forward log syncpt */
525 			/* lmSync(log); */
526 
527 			jfs_info("log barrier off: 0x%x", log->lsn);
528 
529 			/* enable new transactions start */
530 			clear_bit(log_SYNCBARRIER, &log->flag);
531 
532 			/* wakeup all waitors for logsync barrier */
533 			TXN_WAKEUP(&log->syncwait);
534 		}
535 	}
536 
537 	/*
538 	 * wakeup all waitors for a free tblock
539 	 */
540 	TXN_WAKEUP(&TxAnchor.freewait);
541 
542 	TXN_UNLOCK();
543 }
544 
545 
546 /*
547  *      txLock()
548  *
549  * function: acquire a transaction lock on the specified <mp>
550  *
551  * parameter:
552  *
553  * return:      transaction lock id
554  *
555  * serialization:
556  */
txLock(tid_t tid,struct inode * ip,struct metapage * mp,int type)557 struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
558 		     int type)
559 {
560 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
561 	int dir_xtree = 0;
562 	lid_t lid;
563 	tid_t xtid;
564 	struct tlock *tlck;
565 	struct xtlock *xtlck;
566 	struct linelock *linelock;
567 	xtpage_t *p;
568 	struct tblock *tblk;
569 
570 	assert(!test_cflag(COMMIT_Nolink, ip));
571 
572 	TXN_LOCK();
573 
574 	if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
575 	    !(mp->xflag & COMMIT_PAGE)) {
576 		/*
577 		 * Directory inode is special.  It can have both an xtree tlock
578 		 * and a dtree tlock associated with it.
579 		 */
580 		dir_xtree = 1;
581 		lid = jfs_ip->xtlid;
582 	} else
583 		lid = mp->lid;
584 
585 	/* is page not locked by a transaction ? */
586 	if (lid == 0)
587 		goto allocateLock;
588 
589 	jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
590 
591 	/* is page locked by the requester transaction ? */
592 	tlck = lid_to_tlock(lid);
593 	if ((xtid = tlck->tid) == tid)
594 		goto grantLock;
595 
596 	/*
597 	 * is page locked by anonymous transaction/lock ?
598 	 *
599 	 * (page update without transaction (i.e., file write) is
600 	 * locked under anonymous transaction tid = 0:
601 	 * anonymous tlocks maintained on anonymous tlock list of
602 	 * the inode of the page and available to all anonymous
603 	 * transactions until txCommit() time at which point
604 	 * they are transferred to the transaction tlock list of
605 	 * the commiting transaction of the inode)
606 	 */
607 	if (xtid == 0) {
608 		tlck->tid = tid;
609 		tblk = tid_to_tblock(tid);
610 		/*
611 		 * The order of the tlocks in the transaction is important
612 		 * (during truncate, child xtree pages must be freed before
613 		 * parent's tlocks change the working map).
614 		 * Take tlock off anonymous list and add to tail of
615 		 * transaction list
616 		 *
617 		 * Note:  We really need to get rid of the tid & lid and
618 		 * use list_head's.  This code is getting UGLY!
619 		 */
620 		if (jfs_ip->atlhead == lid) {
621 			if (jfs_ip->atltail == lid) {
622 				/* only anonymous txn.
623 				 * Remove from anon_list
624 				 */
625 				list_del_init(&jfs_ip->anon_inode_list);
626 			}
627 			jfs_ip->atlhead = tlck->next;
628 		} else {
629 			lid_t last;
630 			for (last = jfs_ip->atlhead;
631 			     lid_to_tlock(last)->next != lid;
632 			     last = lid_to_tlock(last)->next) {
633 				assert(last);
634 			}
635 			lid_to_tlock(last)->next = tlck->next;
636 			if (jfs_ip->atltail == lid)
637 				jfs_ip->atltail = last;
638 		}
639 
640 		/* insert the tlock at tail of transaction tlock list */
641 
642 		if (tblk->next)
643 			lid_to_tlock(tblk->last)->next = lid;
644 		else
645 			tblk->next = lid;
646 		tlck->next = 0;
647 		tblk->last = lid;
648 
649 		goto grantLock;
650 	}
651 
652 	goto waitLock;
653 
654 	/*
655 	 * allocate a tlock
656 	 */
657       allocateLock:
658 	lid = txLockAlloc();
659 	tlck = lid_to_tlock(lid);
660 
661 	/*
662 	 * initialize tlock
663 	 */
664 	tlck->tid = tid;
665 
666 	/* mark tlock for meta-data page */
667 	if (mp->xflag & COMMIT_PAGE) {
668 
669 		tlck->flag = tlckPAGELOCK;
670 
671 		/* mark the page dirty and nohomeok */
672 		mark_metapage_dirty(mp);
673 		atomic_inc(&mp->nohomeok);
674 
675 		jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
676 			 mp, atomic_read(&mp->nohomeok), tid, tlck);
677 
678 		/* if anonymous transaction, and buffer is on the group
679 		 * commit synclist, mark inode to show this.  This will
680 		 * prevent the buffer from being marked nohomeok for too
681 		 * long a time.
682 		 */
683 		if ((tid == 0) && mp->lsn)
684 			set_cflag(COMMIT_Synclist, ip);
685 	}
686 	/* mark tlock for in-memory inode */
687 	else
688 		tlck->flag = tlckINODELOCK;
689 
690 	tlck->type = 0;
691 
692 	/* bind the tlock and the page */
693 	tlck->ip = ip;
694 	tlck->mp = mp;
695 	if (dir_xtree)
696 		jfs_ip->xtlid = lid;
697 	else
698 		mp->lid = lid;
699 
700 	/*
701 	 * enqueue transaction lock to transaction/inode
702 	 */
703 	/* insert the tlock at tail of transaction tlock list */
704 	if (tid) {
705 		tblk = tid_to_tblock(tid);
706 		if (tblk->next)
707 			lid_to_tlock(tblk->last)->next = lid;
708 		else
709 			tblk->next = lid;
710 		tlck->next = 0;
711 		tblk->last = lid;
712 	}
713 	/* anonymous transaction:
714 	 * insert the tlock at head of inode anonymous tlock list
715 	 */
716 	else {
717 		tlck->next = jfs_ip->atlhead;
718 		jfs_ip->atlhead = lid;
719 		if (tlck->next == 0) {
720 			/* This inode's first anonymous transaction */
721 			jfs_ip->atltail = lid;
722 			list_add_tail(&jfs_ip->anon_inode_list,
723 				      &TxAnchor.anon_list);
724 		}
725 	}
726 
727 	/* initialize type dependent area for linelock */
728 	linelock = (struct linelock *) & tlck->lock;
729 	linelock->next = 0;
730 	linelock->flag = tlckLINELOCK;
731 	linelock->maxcnt = TLOCKSHORT;
732 	linelock->index = 0;
733 
734 	switch (type & tlckTYPE) {
735 	case tlckDTREE:
736 		linelock->l2linesize = L2DTSLOTSIZE;
737 		break;
738 
739 	case tlckXTREE:
740 		linelock->l2linesize = L2XTSLOTSIZE;
741 
742 		xtlck = (struct xtlock *) linelock;
743 		xtlck->header.offset = 0;
744 		xtlck->header.length = 2;
745 
746 		if (type & tlckNEW) {
747 			xtlck->lwm.offset = XTENTRYSTART;
748 		} else {
749 			if (mp->xflag & COMMIT_PAGE)
750 				p = (xtpage_t *) mp->data;
751 			else
752 				p = &jfs_ip->i_xtroot;
753 			xtlck->lwm.offset =
754 			    le16_to_cpu(p->header.nextindex);
755 		}
756 		xtlck->lwm.length = 0;	/* ! */
757 		xtlck->twm.offset = 0;
758 		xtlck->hwm.offset = 0;
759 
760 		xtlck->index = 2;
761 		break;
762 
763 	case tlckINODE:
764 		linelock->l2linesize = L2INODESLOTSIZE;
765 		break;
766 
767 	case tlckDATA:
768 		linelock->l2linesize = L2DATASLOTSIZE;
769 		break;
770 
771 	default:
772 		jfs_err("UFO tlock:0x%p", tlck);
773 	}
774 
775 	/*
776 	 * update tlock vector
777 	 */
778       grantLock:
779 	tlck->type |= type;
780 
781 	TXN_UNLOCK();
782 
783 	return tlck;
784 
785 	/*
786 	 * page is being locked by another transaction:
787 	 */
788       waitLock:
789 	/* Only locks on ipimap or ipaimap should reach here */
790 	/* assert(jfs_ip->fileset == AGGREGATE_I); */
791 	if (jfs_ip->fileset != AGGREGATE_I) {
792 		jfs_err("txLock: trying to lock locked page!");
793 		dump_mem("ip", ip, sizeof(struct inode));
794 		dump_mem("mp", mp, sizeof(struct metapage));
795 		dump_mem("Locker's tblk", tid_to_tblock(tid),
796 			 sizeof(struct tblock));
797 		dump_mem("Tlock", tlck, sizeof(struct tlock));
798 		BUG();
799 	}
800 	INCREMENT(stattx.waitlock);	/* statistics */
801 	release_metapage(mp);
802 
803 	jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
804 		 tid, xtid, lid);
805 	TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
806 	jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
807 
808 	return NULL;
809 }
810 
811 
812 /*
813  * NAME:        txRelease()
814  *
815  * FUNCTION:    Release buffers associated with transaction locks, but don't
816  *		mark homeok yet.  The allows other transactions to modify
817  *		buffers, but won't let them go to disk until commit record
818  *		actually gets written.
819  *
820  * PARAMETER:
821  *              tblk    -
822  *
823  * RETURN:      Errors from subroutines.
824  */
txRelease(struct tblock * tblk)825 static void txRelease(struct tblock * tblk)
826 {
827 	struct metapage *mp;
828 	lid_t lid;
829 	struct tlock *tlck;
830 
831 	TXN_LOCK();
832 
833 	for (lid = tblk->next; lid; lid = tlck->next) {
834 		tlck = lid_to_tlock(lid);
835 		if ((mp = tlck->mp) != NULL &&
836 		    (tlck->type & tlckBTROOT) == 0) {
837 			assert(mp->xflag & COMMIT_PAGE);
838 			mp->lid = 0;
839 		}
840 	}
841 
842 	/*
843 	 * wakeup transactions waiting on a page locked
844 	 * by the current transaction
845 	 */
846 	TXN_WAKEUP(&tblk->waitor);
847 
848 	TXN_UNLOCK();
849 }
850 
851 
852 /*
853  * NAME:        txUnlock()
854  *
855  * FUNCTION:    Initiates pageout of pages modified by tid in journalled
856  *              objects and frees their lockwords.
857  */
txUnlock(struct tblock * tblk)858 static void txUnlock(struct tblock * tblk)
859 {
860 	struct tlock *tlck;
861 	struct linelock *linelock;
862 	lid_t lid, next, llid, k;
863 	struct metapage *mp;
864 	struct jfs_log *log;
865 	int difft, diffp;
866 
867 	jfs_info("txUnlock: tblk = 0x%p", tblk);
868 	log = JFS_SBI(tblk->sb)->log;
869 
870 	/*
871 	 * mark page under tlock homeok (its log has been written):
872 	 */
873 	for (lid = tblk->next; lid; lid = next) {
874 		tlck = lid_to_tlock(lid);
875 		next = tlck->next;
876 
877 		jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
878 
879 		/* unbind page from tlock */
880 		if ((mp = tlck->mp) != NULL &&
881 		    (tlck->type & tlckBTROOT) == 0) {
882 			assert(mp->xflag & COMMIT_PAGE);
883 
884 			/* hold buffer
885 			 *
886 			 * It's possible that someone else has the metapage.
887 			 * The only things were changing are nohomeok, which
888 			 * is handled atomically, and clsn which is protected
889 			 * by the LOGSYNC_LOCK.
890 			 */
891 			hold_metapage(mp, 1);
892 
893 			assert(atomic_read(&mp->nohomeok) > 0);
894 			atomic_dec(&mp->nohomeok);
895 
896 			/* inherit younger/larger clsn */
897 			LOGSYNC_LOCK(log);
898 			if (mp->clsn) {
899 				logdiff(difft, tblk->clsn, log);
900 				logdiff(diffp, mp->clsn, log);
901 				if (difft > diffp)
902 					mp->clsn = tblk->clsn;
903 			} else
904 				mp->clsn = tblk->clsn;
905 			LOGSYNC_UNLOCK(log);
906 
907 			assert(!(tlck->flag & tlckFREEPAGE));
908 
909 			if (tlck->flag & tlckWRITEPAGE) {
910 				write_metapage(mp);
911 			} else {
912 				/* release page which has been forced */
913 				release_metapage(mp);
914 			}
915 		}
916 
917 		/* insert tlock, and linelock(s) of the tlock if any,
918 		 * at head of freelist
919 		 */
920 		TXN_LOCK();
921 
922 		llid = ((struct linelock *) & tlck->lock)->next;
923 		while (llid) {
924 			linelock = (struct linelock *) lid_to_tlock(llid);
925 			k = linelock->next;
926 			txLockFree(llid);
927 			llid = k;
928 		}
929 		txLockFree(lid);
930 
931 		TXN_UNLOCK();
932 	}
933 	tblk->next = tblk->last = 0;
934 
935 	/*
936 	 * remove tblock from logsynclist
937 	 * (allocation map pages inherited lsn of tblk and
938 	 * has been inserted in logsync list at txUpdateMap())
939 	 */
940 	if (tblk->lsn) {
941 		LOGSYNC_LOCK(log);
942 		log->count--;
943 		list_del(&tblk->synclist);
944 		LOGSYNC_UNLOCK(log);
945 	}
946 }
947 
948 
949 /*
950  *      txMaplock()
951  *
952  * function: allocate a transaction lock for freed page/entry;
953  *      for freed page, maplock is used as xtlock/dtlock type;
954  */
txMaplock(tid_t tid,struct inode * ip,int type)955 struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
956 {
957 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
958 	lid_t lid;
959 	struct tblock *tblk;
960 	struct tlock *tlck;
961 	struct maplock *maplock;
962 
963 	TXN_LOCK();
964 
965 	/*
966 	 * allocate a tlock
967 	 */
968 	lid = txLockAlloc();
969 	tlck = lid_to_tlock(lid);
970 
971 	/*
972 	 * initialize tlock
973 	 */
974 	tlck->tid = tid;
975 
976 	/* bind the tlock and the object */
977 	tlck->flag = tlckINODELOCK;
978 	tlck->ip = ip;
979 	tlck->mp = NULL;
980 
981 	tlck->type = type;
982 
983 	/*
984 	 * enqueue transaction lock to transaction/inode
985 	 */
986 	/* insert the tlock at tail of transaction tlock list */
987 	if (tid) {
988 		tblk = tid_to_tblock(tid);
989 		if (tblk->next)
990 			lid_to_tlock(tblk->last)->next = lid;
991 		else
992 			tblk->next = lid;
993 		tlck->next = 0;
994 		tblk->last = lid;
995 	}
996 	/* anonymous transaction:
997 	 * insert the tlock at head of inode anonymous tlock list
998 	 */
999 	else {
1000 		tlck->next = jfs_ip->atlhead;
1001 		jfs_ip->atlhead = lid;
1002 		if (tlck->next == 0) {
1003 			/* This inode's first anonymous transaction */
1004 			jfs_ip->atltail = lid;
1005 			list_add_tail(&jfs_ip->anon_inode_list,
1006 				      &TxAnchor.anon_list);
1007 		}
1008 	}
1009 
1010 	TXN_UNLOCK();
1011 
1012 	/* initialize type dependent area for maplock */
1013 	maplock = (struct maplock *) & tlck->lock;
1014 	maplock->next = 0;
1015 	maplock->maxcnt = 0;
1016 	maplock->index = 0;
1017 
1018 	return tlck;
1019 }
1020 
1021 
1022 /*
1023  *      txLinelock()
1024  *
1025  * function: allocate a transaction lock for log vector list
1026  */
txLinelock(struct linelock * tlock)1027 struct linelock *txLinelock(struct linelock * tlock)
1028 {
1029 	lid_t lid;
1030 	struct tlock *tlck;
1031 	struct linelock *linelock;
1032 
1033 	TXN_LOCK();
1034 
1035 	/* allocate a TxLock structure */
1036 	lid = txLockAlloc();
1037 	tlck = lid_to_tlock(lid);
1038 
1039 	TXN_UNLOCK();
1040 
1041 	/* initialize linelock */
1042 	linelock = (struct linelock *) tlck;
1043 	linelock->next = 0;
1044 	linelock->flag = tlckLINELOCK;
1045 	linelock->maxcnt = TLOCKLONG;
1046 	linelock->index = 0;
1047 
1048 	/* append linelock after tlock */
1049 	linelock->next = tlock->next;
1050 	tlock->next = lid;
1051 
1052 	return linelock;
1053 }
1054 
1055 
1056 
1057 /*
1058  *              transaction commit management
1059  *              -----------------------------
1060  */
1061 
1062 /*
1063  * NAME:        txCommit()
1064  *
1065  * FUNCTION:    commit the changes to the objects specified in
1066  *              clist.  For journalled segments only the
1067  *              changes of the caller are committed, ie by tid.
1068  *              for non-journalled segments the data are flushed to
1069  *              disk and then the change to the disk inode and indirect
1070  *              blocks committed (so blocks newly allocated to the
1071  *              segment will be made a part of the segment atomically).
1072  *
1073  *              all of the segments specified in clist must be in
1074  *              one file system. no more than 6 segments are needed
1075  *              to handle all unix svcs.
1076  *
1077  *              if the i_nlink field (i.e. disk inode link count)
1078  *              is zero, and the type of inode is a regular file or
1079  *              directory, or symbolic link , the inode is truncated
1080  *              to zero length. the truncation is committed but the
1081  *              VM resources are unaffected until it is closed (see
1082  *              iput and iclose).
1083  *
1084  * PARAMETER:
1085  *
1086  * RETURN:
1087  *
1088  * serialization:
1089  *              on entry the inode lock on each segment is assumed
1090  *              to be held.
1091  *
1092  * i/o error:
1093  */
txCommit(tid_t tid,int nip,struct inode ** iplist,int flag)1094 int txCommit(tid_t tid,		/* transaction identifier */
1095 	     int nip,		/* number of inodes to commit */
1096 	     struct inode **iplist,	/* list of inode to commit */
1097 	     int flag)
1098 {
1099 	int rc = 0;
1100 	struct commit cd;
1101 	struct jfs_log *log;
1102 	struct tblock *tblk;
1103 	struct lrd *lrd;
1104 	int lsn;
1105 	struct inode *ip;
1106 	struct jfs_inode_info *jfs_ip;
1107 	int k, n;
1108 	ino_t top;
1109 	struct super_block *sb;
1110 
1111 	jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
1112 	/* is read-only file system ? */
1113 	if (isReadOnly(iplist[0])) {
1114 		rc = -EROFS;
1115 		goto TheEnd;
1116 	}
1117 
1118 	sb = cd.sb = iplist[0]->i_sb;
1119 	cd.tid = tid;
1120 
1121 	if (tid == 0)
1122 		tid = txBegin(sb, 0);
1123 	tblk = tid_to_tblock(tid);
1124 
1125 	/*
1126 	 * initialize commit structure
1127 	 */
1128 	log = JFS_SBI(sb)->log;
1129 	cd.log = log;
1130 
1131 	/* initialize log record descriptor in commit */
1132 	lrd = &cd.lrd;
1133 	lrd->logtid = cpu_to_le32(tblk->logtid);
1134 	lrd->backchain = 0;
1135 
1136 	tblk->xflag |= flag;
1137 
1138 	if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1139 		tblk->xflag |= COMMIT_LAZY;
1140 	/*
1141 	 *      prepare non-journaled objects for commit
1142 	 *
1143 	 * flush data pages of non-journaled file
1144 	 * to prevent the file getting non-initialized disk blocks
1145 	 * in case of crash.
1146 	 * (new blocks - )
1147 	 */
1148 	cd.iplist = iplist;
1149 	cd.nip = nip;
1150 
1151 	/*
1152 	 *      acquire transaction lock on (on-disk) inodes
1153 	 *
1154 	 * update on-disk inode from in-memory inode
1155 	 * acquiring transaction locks for AFTER records
1156 	 * on the on-disk inode of file object
1157 	 *
1158 	 * sort the inodes array by inode number in descending order
1159 	 * to prevent deadlock when acquiring transaction lock
1160 	 * of on-disk inodes on multiple on-disk inode pages by
1161 	 * multiple concurrent transactions
1162 	 */
1163 	for (k = 0; k < cd.nip; k++) {
1164 		top = (cd.iplist[k])->i_ino;
1165 		for (n = k + 1; n < cd.nip; n++) {
1166 			ip = cd.iplist[n];
1167 			if (ip->i_ino > top) {
1168 				top = ip->i_ino;
1169 				cd.iplist[n] = cd.iplist[k];
1170 				cd.iplist[k] = ip;
1171 			}
1172 		}
1173 
1174 		ip = cd.iplist[k];
1175 		jfs_ip = JFS_IP(ip);
1176 
1177 		if (test_and_clear_cflag(COMMIT_Syncdata, ip) &&
1178 		    ((tblk->flag & COMMIT_DELETE) == 0))
1179 			fsync_inode_data_buffers(ip);
1180 
1181 		/*
1182 		 * Mark inode as not dirty.  It will still be on the dirty
1183 		 * inode list, but we'll know not to commit it again unless
1184 		 * it gets marked dirty again
1185 		 */
1186 		clear_cflag(COMMIT_Dirty, ip);
1187 
1188 		/* inherit anonymous tlock(s) of inode */
1189 		if (jfs_ip->atlhead) {
1190 			lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
1191 			tblk->next = jfs_ip->atlhead;
1192 			if (!tblk->last)
1193 				tblk->last = jfs_ip->atltail;
1194 			jfs_ip->atlhead = jfs_ip->atltail = 0;
1195 			TXN_LOCK();
1196 			list_del_init(&jfs_ip->anon_inode_list);
1197 			TXN_UNLOCK();
1198 		}
1199 
1200 		/*
1201 		 * acquire transaction lock on on-disk inode page
1202 		 * (become first tlock of the tblk's tlock list)
1203 		 */
1204 		if (((rc = diWrite(tid, ip))))
1205 			goto out;
1206 	}
1207 
1208 	/*
1209 	 *      write log records from transaction locks
1210 	 *
1211 	 * txUpdateMap() resets XAD_NEW in XAD.
1212 	 */
1213 	if ((rc = txLog(log, tblk, &cd)))
1214 		goto TheEnd;
1215 
1216 	/*
1217 	 * Ensure that inode isn't reused before
1218 	 * lazy commit thread finishes processing
1219 	 */
1220 	if (tblk->xflag & (COMMIT_CREATE | COMMIT_DELETE)) {
1221 		atomic_inc(&tblk->ip->i_count);
1222 		/*
1223 		 * Avoid a rare deadlock
1224 		 *
1225 		 * If the inode is locked, we may be blocked in
1226 		 * jfs_commit_inode.  If so, we don't want the
1227 		 * lazy_commit thread doing the last iput() on the inode
1228 		 * since that may block on the locked inode.  Instead,
1229 		 * commit the transaction synchronously, so the last iput
1230 		 * will be done by the calling thread (or later)
1231 		 */
1232 		if (tblk->ip->i_state & I_LOCK)
1233 			tblk->xflag &= ~COMMIT_LAZY;
1234 	}
1235 
1236 	ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
1237 	       ((tblk->ip->i_nlink == 0) &&
1238 		!test_cflag(COMMIT_Nolink, tblk->ip)));
1239 
1240 	/*
1241 	 *      write COMMIT log record
1242 	 */
1243 	lrd->type = cpu_to_le16(LOG_COMMIT);
1244 	lrd->length = 0;
1245 	lsn = lmLog(log, tblk, lrd, NULL);
1246 
1247 	lmGroupCommit(log, tblk);
1248 
1249 	/*
1250 	 *      - transaction is now committed -
1251 	 */
1252 
1253 	/*
1254 	 * force pages in careful update
1255 	 * (imap addressing structure update)
1256 	 */
1257 	if (flag & COMMIT_FORCE)
1258 		txForce(tblk);
1259 
1260 	/*
1261 	 *      update allocation map.
1262 	 *
1263 	 * update inode allocation map and inode:
1264 	 * free pager lock on memory object of inode if any.
1265 	 * update  block allocation map.
1266 	 *
1267 	 * txUpdateMap() resets XAD_NEW in XAD.
1268 	 */
1269 	if (tblk->xflag & COMMIT_FORCE)
1270 		txUpdateMap(tblk);
1271 
1272 	/*
1273 	 *      free transaction locks and pageout/free pages
1274 	 */
1275 	txRelease(tblk);
1276 
1277 	if ((tblk->flag & tblkGC_LAZY) == 0)
1278 		txUnlock(tblk);
1279 
1280 
1281 	/*
1282 	 *      reset in-memory object state
1283 	 */
1284 	for (k = 0; k < cd.nip; k++) {
1285 		ip = cd.iplist[k];
1286 		jfs_ip = JFS_IP(ip);
1287 
1288 		/*
1289 		 * reset in-memory inode state
1290 		 */
1291 		jfs_ip->bxflag = 0;
1292 		jfs_ip->blid = 0;
1293 	}
1294 
1295       out:
1296 	if (rc != 0)
1297 		txAbort(tid, 1);
1298 
1299       TheEnd:
1300 	jfs_info("txCommit: tid = %d, returning %d", tid, rc);
1301 	return rc;
1302 }
1303 
1304 
1305 /*
1306  * NAME:        txLog()
1307  *
1308  * FUNCTION:    Writes AFTER log records for all lines modified
1309  *              by tid for segments specified by inodes in comdata.
1310  *              Code assumes only WRITELOCKS are recorded in lockwords.
1311  *
1312  * PARAMETERS:
1313  *
1314  * RETURN :
1315  */
txLog(struct jfs_log * log,struct tblock * tblk,struct commit * cd)1316 static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
1317 {
1318 	int rc = 0;
1319 	struct inode *ip;
1320 	lid_t lid;
1321 	struct tlock *tlck;
1322 	struct lrd *lrd = &cd->lrd;
1323 
1324 	/*
1325 	 * write log record(s) for each tlock of transaction,
1326 	 */
1327 	for (lid = tblk->next; lid; lid = tlck->next) {
1328 		tlck = lid_to_tlock(lid);
1329 
1330 		tlck->flag |= tlckLOG;
1331 
1332 		/* initialize lrd common */
1333 		ip = tlck->ip;
1334 		lrd->aggregate = cpu_to_le32(kdev_t_to_nr(ip->i_dev));
1335 		lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
1336 		lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
1337 
1338 		/* write log record of page from the tlock */
1339 		switch (tlck->type & tlckTYPE) {
1340 		case tlckXTREE:
1341 			xtLog(log, tblk, lrd, tlck);
1342 			break;
1343 
1344 		case tlckDTREE:
1345 			dtLog(log, tblk, lrd, tlck);
1346 			break;
1347 
1348 		case tlckINODE:
1349 			diLog(log, tblk, lrd, tlck, cd);
1350 			break;
1351 
1352 		case tlckMAP:
1353 			mapLog(log, tblk, lrd, tlck);
1354 			break;
1355 
1356 		case tlckDATA:
1357 			dataLog(log, tblk, lrd, tlck);
1358 			break;
1359 
1360 		default:
1361 			jfs_err("UFO tlock:0x%p", tlck);
1362 		}
1363 	}
1364 
1365 	return rc;
1366 }
1367 
1368 
1369 /*
1370  *      diLog()
1371  *
1372  * function:    log inode tlock and format maplock to update bmap;
1373  */
diLog(struct jfs_log * log,struct tblock * tblk,struct lrd * lrd,struct tlock * tlck,struct commit * cd)1374 static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1375 	  struct tlock * tlck, struct commit * cd)
1376 {
1377 	int rc = 0;
1378 	struct metapage *mp;
1379 	pxd_t *pxd;
1380 	struct pxd_lock *pxdlock;
1381 
1382 	mp = tlck->mp;
1383 
1384 	/* initialize as REDOPAGE record format */
1385 	lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
1386 	lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
1387 
1388 	pxd = &lrd->log.redopage.pxd;
1389 
1390 	/*
1391 	 *      inode after image
1392 	 */
1393 	if (tlck->type & tlckENTRY) {
1394 		/* log after-image for logredo(): */
1395 		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1396 //              *pxd = mp->cm_pxd;
1397 		PXDaddress(pxd, mp->index);
1398 		PXDlength(pxd,
1399 			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1400 		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1401 
1402 		/* mark page as homeward bound */
1403 		tlck->flag |= tlckWRITEPAGE;
1404 	} else if (tlck->type & tlckFREE) {
1405 		/*
1406 		 *      free inode extent
1407 		 *
1408 		 * (pages of the freed inode extent have been invalidated and
1409 		 * a maplock for free of the extent has been formatted at
1410 		 * txLock() time);
1411 		 *
1412 		 * the tlock had been acquired on the inode allocation map page
1413 		 * (iag) that specifies the freed extent, even though the map
1414 		 * page is not itself logged, to prevent pageout of the map
1415 		 * page before the log;
1416 		 */
1417 
1418 		/* log LOG_NOREDOINOEXT of the freed inode extent for
1419 		 * logredo() to start NoRedoPage filters, and to update
1420 		 * imap and bmap for free of the extent;
1421 		 */
1422 		lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
1423 		/*
1424 		 * For the LOG_NOREDOINOEXT record, we need
1425 		 * to pass the IAG number and inode extent
1426 		 * index (within that IAG) from which the
1427 		 * the extent being released.  These have been
1428 		 * passed to us in the iplist[1] and iplist[2].
1429 		 */
1430 		lrd->log.noredoinoext.iagnum =
1431 		    cpu_to_le32((u32) (size_t) cd->iplist[1]);
1432 		lrd->log.noredoinoext.inoext_idx =
1433 		    cpu_to_le32((u32) (size_t) cd->iplist[2]);
1434 
1435 		pxdlock = (struct pxd_lock *) & tlck->lock;
1436 		*pxd = pxdlock->pxd;
1437 		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1438 
1439 		/* update bmap */
1440 		tlck->flag |= tlckUPDATEMAP;
1441 
1442 		/* mark page as homeward bound */
1443 		tlck->flag |= tlckWRITEPAGE;
1444 	} else
1445 		jfs_err("diLog: UFO type tlck:0x%p", tlck);
1446 #ifdef  _JFS_WIP
1447 	/*
1448 	 *      alloc/free external EA extent
1449 	 *
1450 	 * a maplock for txUpdateMap() to update bPWMAP for alloc/free
1451 	 * of the extent has been formatted at txLock() time;
1452 	 */
1453 	else {
1454 		assert(tlck->type & tlckEA);
1455 
1456 		/* log LOG_UPDATEMAP for logredo() to update bmap for
1457 		 * alloc of new (and free of old) external EA extent;
1458 		 */
1459 		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1460 		pxdlock = (struct pxd_lock *) & tlck->lock;
1461 		nlock = pxdlock->index;
1462 		for (i = 0; i < nlock; i++, pxdlock++) {
1463 			if (pxdlock->flag & mlckALLOCPXD)
1464 				lrd->log.updatemap.type =
1465 				    cpu_to_le16(LOG_ALLOCPXD);
1466 			else
1467 				lrd->log.updatemap.type =
1468 				    cpu_to_le16(LOG_FREEPXD);
1469 			lrd->log.updatemap.nxd = cpu_to_le16(1);
1470 			lrd->log.updatemap.pxd = pxdlock->pxd;
1471 			lrd->backchain =
1472 			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1473 		}
1474 
1475 		/* update bmap */
1476 		tlck->flag |= tlckUPDATEMAP;
1477 	}
1478 #endif				/* _JFS_WIP */
1479 
1480 	return rc;
1481 }
1482 
1483 
1484 /*
1485  *      dataLog()
1486  *
1487  * function:    log data tlock
1488  */
dataLog(struct jfs_log * log,struct tblock * tblk,struct lrd * lrd,struct tlock * tlck)1489 static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1490 	    struct tlock * tlck)
1491 {
1492 	struct metapage *mp;
1493 	pxd_t *pxd;
1494 
1495 	mp = tlck->mp;
1496 
1497 	/* initialize as REDOPAGE record format */
1498 	lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
1499 	lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
1500 
1501 	pxd = &lrd->log.redopage.pxd;
1502 
1503 	/* log after-image for logredo(): */
1504 	lrd->type = cpu_to_le16(LOG_REDOPAGE);
1505 
1506 	if (JFS_IP(tlck->ip)->next_index < MAX_INLINE_DIRTABLE_ENTRY) {
1507 		/*
1508 		 * The table has been truncated, we've must have deleted
1509 		 * the last entry, so don't bother logging this
1510 		 */
1511 		mp->lid = 0;
1512 		hold_metapage(mp, 0);
1513 		atomic_dec(&mp->nohomeok);
1514 		discard_metapage(mp);
1515 		tlck->mp = 0;
1516 		return 0;
1517 	}
1518 
1519 	PXDaddress(pxd, mp->index);
1520 	PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1521 
1522 	lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1523 
1524 	/* mark page as homeward bound */
1525 	tlck->flag |= tlckWRITEPAGE;
1526 
1527 	return 0;
1528 }
1529 
1530 
1531 /*
1532  *      dtLog()
1533  *
1534  * function:    log dtree tlock and format maplock to update bmap;
1535  */
dtLog(struct jfs_log * log,struct tblock * tblk,struct lrd * lrd,struct tlock * tlck)1536 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1537 	   struct tlock * tlck)
1538 {
1539 	struct metapage *mp;
1540 	struct pxd_lock *pxdlock;
1541 	pxd_t *pxd;
1542 
1543 	mp = tlck->mp;
1544 
1545 	/* initialize as REDOPAGE/NOREDOPAGE record format */
1546 	lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
1547 	lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
1548 
1549 	pxd = &lrd->log.redopage.pxd;
1550 
1551 	if (tlck->type & tlckBTROOT)
1552 		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1553 
1554 	/*
1555 	 *      page extension via relocation: entry insertion;
1556 	 *      page extension in-place: entry insertion;
1557 	 *      new right page from page split, reinitialized in-line
1558 	 *      root from root page split: entry insertion;
1559 	 */
1560 	if (tlck->type & (tlckNEW | tlckEXTEND)) {
1561 		/* log after-image of the new page for logredo():
1562 		 * mark log (LOG_NEW) for logredo() to initialize
1563 		 * freelist and update bmap for alloc of the new page;
1564 		 */
1565 		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1566 		if (tlck->type & tlckEXTEND)
1567 			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
1568 		else
1569 			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
1570 //              *pxd = mp->cm_pxd;
1571 		PXDaddress(pxd, mp->index);
1572 		PXDlength(pxd,
1573 			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1574 		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1575 
1576 		/* format a maplock for txUpdateMap() to update bPMAP for
1577 		 * alloc of the new page;
1578 		 */
1579 		if (tlck->type & tlckBTROOT)
1580 			return;
1581 		tlck->flag |= tlckUPDATEMAP;
1582 		pxdlock = (struct pxd_lock *) & tlck->lock;
1583 		pxdlock->flag = mlckALLOCPXD;
1584 		pxdlock->pxd = *pxd;
1585 
1586 		pxdlock->index = 1;
1587 
1588 		/* mark page as homeward bound */
1589 		tlck->flag |= tlckWRITEPAGE;
1590 		return;
1591 	}
1592 
1593 	/*
1594 	 *      entry insertion/deletion,
1595 	 *      sibling page link update (old right page before split);
1596 	 */
1597 	if (tlck->type & (tlckENTRY | tlckRELINK)) {
1598 		/* log after-image for logredo(): */
1599 		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1600 		PXDaddress(pxd, mp->index);
1601 		PXDlength(pxd,
1602 			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1603 		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1604 
1605 		/* mark page as homeward bound */
1606 		tlck->flag |= tlckWRITEPAGE;
1607 		return;
1608 	}
1609 
1610 	/*
1611 	 *      page deletion: page has been invalidated
1612 	 *      page relocation: source extent
1613 	 *
1614 	 *      a maplock for free of the page has been formatted
1615 	 *      at txLock() time);
1616 	 */
1617 	if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1618 		/* log LOG_NOREDOPAGE of the deleted page for logredo()
1619 		 * to start NoRedoPage filter and to update bmap for free
1620 		 * of the deletd page
1621 		 */
1622 		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1623 		pxdlock = (struct pxd_lock *) & tlck->lock;
1624 		*pxd = pxdlock->pxd;
1625 		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1626 
1627 		/* a maplock for txUpdateMap() for free of the page
1628 		 * has been formatted at txLock() time;
1629 		 */
1630 		tlck->flag |= tlckUPDATEMAP;
1631 	}
1632 	return;
1633 }
1634 
1635 
1636 /*
1637  *      xtLog()
1638  *
1639  * function:    log xtree tlock and format maplock to update bmap;
1640  */
xtLog(struct jfs_log * log,struct tblock * tblk,struct lrd * lrd,struct tlock * tlck)1641 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1642 	   struct tlock * tlck)
1643 {
1644 	struct inode *ip;
1645 	struct metapage *mp;
1646 	xtpage_t *p;
1647 	struct xtlock *xtlck;
1648 	struct maplock *maplock;
1649 	struct xdlistlock *xadlock;
1650 	struct pxd_lock *pxdlock;
1651 	pxd_t *pxd;
1652 	int next, lwm, hwm;
1653 
1654 	ip = tlck->ip;
1655 	mp = tlck->mp;
1656 
1657 	/* initialize as REDOPAGE/NOREDOPAGE record format */
1658 	lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1659 	lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1660 
1661 	pxd = &lrd->log.redopage.pxd;
1662 
1663 	if (tlck->type & tlckBTROOT) {
1664 		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1665 		p = &JFS_IP(ip)->i_xtroot;
1666 		if (S_ISDIR(ip->i_mode))
1667 			lrd->log.redopage.type |=
1668 			    cpu_to_le16(LOG_DIR_XTREE);
1669 	} else
1670 		p = (xtpage_t *) mp->data;
1671 	next = le16_to_cpu(p->header.nextindex);
1672 
1673 	xtlck = (struct xtlock *) & tlck->lock;
1674 
1675 	maplock = (struct maplock *) & tlck->lock;
1676 	xadlock = (struct xdlistlock *) maplock;
1677 
1678 	/*
1679 	 *      entry insertion/extension;
1680 	 *      sibling page link update (old right page before split);
1681 	 */
1682 	if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1683 		/* log after-image for logredo():
1684 		 * logredo() will update bmap for alloc of new/extended
1685 		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1686 		 * after-image of XADlist;
1687 		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1688 		 * applying the after-image to the meta-data page.
1689 		 */
1690 		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1691 //              *pxd = mp->cm_pxd;
1692 		PXDaddress(pxd, mp->index);
1693 		PXDlength(pxd,
1694 			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1695 		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1696 
1697 		/* format a maplock for txUpdateMap() to update bPMAP
1698 		 * for alloc of new/extended extents of XAD[lwm:next)
1699 		 * from the page itself;
1700 		 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1701 		 */
1702 		lwm = xtlck->lwm.offset;
1703 		if (lwm == 0)
1704 			lwm = XTPAGEMAXSLOT;
1705 
1706 		if (lwm == next)
1707 			goto out;
1708 		if (lwm > next) {
1709 			jfs_err("xtLog: lwm > next\n");
1710 			goto out;
1711 		}
1712 		tlck->flag |= tlckUPDATEMAP;
1713 		xadlock->flag = mlckALLOCXADLIST;
1714 		xadlock->count = next - lwm;
1715 		if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) {
1716 			int i;
1717 			/*
1718 			 * Lazy commit may allow xtree to be modified before
1719 			 * txUpdateMap runs.  Copy xad into linelock to
1720 			 * preserve correct data.
1721 			 */
1722 			xadlock->xdlist = &xtlck->pxdlock;
1723 			memcpy(xadlock->xdlist, &p->xad[lwm],
1724 			       sizeof(xad_t) * xadlock->count);
1725 
1726 			for (i = 0; i < xadlock->count; i++)
1727 				p->xad[lwm + i].flag &=
1728 				    ~(XAD_NEW | XAD_EXTENDED);
1729 		} else {
1730 			/*
1731 			 * xdlist will point to into inode's xtree, ensure
1732 			 * that transaction is not committed lazily.
1733 			 */
1734 			xadlock->xdlist = &p->xad[lwm];
1735 			tblk->xflag &= ~COMMIT_LAZY;
1736 		}
1737 		jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d "
1738 			 "count:%d", tlck->ip, mp, tlck, lwm, xadlock->count);
1739 
1740 		maplock->index = 1;
1741 
1742 	      out:
1743 		/* mark page as homeward bound */
1744 		tlck->flag |= tlckWRITEPAGE;
1745 
1746 		return;
1747 	}
1748 
1749 	/*
1750 	 *      page deletion: file deletion/truncation (ref. xtTruncate())
1751 	 *
1752 	 * (page will be invalidated after log is written and bmap
1753 	 * is updated from the page);
1754 	 */
1755 	if (tlck->type & tlckFREE) {
1756 		/* LOG_NOREDOPAGE log for NoRedoPage filter:
1757 		 * if page free from file delete, NoRedoFile filter from
1758 		 * inode image of zero link count will subsume NoRedoPage
1759 		 * filters for each page;
1760 		 * if page free from file truncattion, write NoRedoPage
1761 		 * filter;
1762 		 *
1763 		 * upadte of block allocation map for the page itself:
1764 		 * if page free from deletion and truncation, LOG_UPDATEMAP
1765 		 * log for the page itself is generated from processing
1766 		 * its parent page xad entries;
1767 		 */
1768 		/* if page free from file truncation, log LOG_NOREDOPAGE
1769 		 * of the deleted page for logredo() to start NoRedoPage
1770 		 * filter for the page;
1771 		 */
1772 		if (tblk->xflag & COMMIT_TRUNCATE) {
1773 			/* write NOREDOPAGE for the page */
1774 			lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1775 			PXDaddress(pxd, mp->index);
1776 			PXDlength(pxd,
1777 				  mp->logical_size >> tblk->sb->
1778 				  s_blocksize_bits);
1779 			lrd->backchain =
1780 			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1781 
1782 			if (tlck->type & tlckBTROOT) {
1783 				/* Empty xtree must be logged */
1784 				lrd->type = cpu_to_le16(LOG_REDOPAGE);
1785 				lrd->backchain =
1786 				    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1787 			}
1788 		}
1789 
1790 		/* init LOG_UPDATEMAP of the freed extents
1791 		 * XAD[XTENTRYSTART:hwm) from the deleted page itself
1792 		 * for logredo() to update bmap;
1793 		 */
1794 		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1795 		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
1796 		xtlck = (struct xtlock *) & tlck->lock;
1797 		hwm = xtlck->hwm.offset;
1798 		lrd->log.updatemap.nxd =
1799 		    cpu_to_le16(hwm - XTENTRYSTART + 1);
1800 		/* reformat linelock for lmLog() */
1801 		xtlck->header.offset = XTENTRYSTART;
1802 		xtlck->header.length = hwm - XTENTRYSTART + 1;
1803 		xtlck->index = 1;
1804 		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1805 
1806 		/* format a maplock for txUpdateMap() to update bmap
1807 		 * to free extents of XAD[XTENTRYSTART:hwm) from the
1808 		 * deleted page itself;
1809 		 */
1810 		tlck->flag |= tlckUPDATEMAP;
1811 		xadlock->flag = mlckFREEXADLIST;
1812 		xadlock->count = hwm - XTENTRYSTART + 1;
1813 		if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) {
1814 			/*
1815 			 * Lazy commit may allow xtree to be modified before
1816 			 * txUpdateMap runs.  Copy xad into linelock to
1817 			 * preserve correct data.
1818 			 */
1819 			xadlock->xdlist = &xtlck->pxdlock;
1820 			memcpy(xadlock->xdlist, &p->xad[XTENTRYSTART],
1821 			       sizeof(xad_t) * xadlock->count);
1822 		} else {
1823 			/*
1824 			 * xdlist will point to into inode's xtree, ensure
1825 			 * that transaction is not committed lazily.
1826 			 */
1827 			xadlock->xdlist = &p->xad[XTENTRYSTART];
1828 			tblk->xflag &= ~COMMIT_LAZY;
1829 		}
1830 		jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
1831 			 tlck->ip, mp, xadlock->count);
1832 
1833 		maplock->index = 1;
1834 
1835 		/* mark page as invalid */
1836 		if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
1837 		    && !(tlck->type & tlckBTROOT))
1838 			tlck->flag |= tlckFREEPAGE;
1839 		/*
1840 		   else (tblk->xflag & COMMIT_PMAP)
1841 		   ? release the page;
1842 		 */
1843 		return;
1844 	}
1845 
1846 	/*
1847 	 *      page/entry truncation: file truncation (ref. xtTruncate())
1848 	 *
1849 	 *     |----------+------+------+---------------|
1850 	 *                |      |      |
1851 	 *                |      |     hwm - hwm before truncation
1852 	 *                |     next - truncation point
1853 	 *               lwm - lwm before truncation
1854 	 * header ?
1855 	 */
1856 	if (tlck->type & tlckTRUNCATE) {
1857 		pxd_t tpxd;	/* truncated extent of xad */
1858 		int twm;
1859 
1860 		/*
1861 		 * For truncation the entire linelock may be used, so it would
1862 		 * be difficult to store xad list in linelock itself.
1863 		 * Therefore, we'll just force transaction to be committed
1864 		 * synchronously, so that xtree pages won't be changed before
1865 		 * txUpdateMap runs.
1866 		 */
1867 		tblk->xflag &= ~COMMIT_LAZY;
1868 		lwm = xtlck->lwm.offset;
1869 		if (lwm == 0)
1870 			lwm = XTPAGEMAXSLOT;
1871 		hwm = xtlck->hwm.offset;
1872 		twm = xtlck->twm.offset;
1873 
1874 		/*
1875 		 *      write log records
1876 		 */
1877 		/* log after-image for logredo():
1878 		 *
1879 		 * logredo() will update bmap for alloc of new/extended
1880 		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1881 		 * after-image of XADlist;
1882 		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1883 		 * applying the after-image to the meta-data page.
1884 		 */
1885 		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1886 		PXDaddress(pxd, mp->index);
1887 		PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1888 		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1889 
1890 		/*
1891 		 * truncate entry XAD[twm == next - 1]:
1892 		 */
1893 		if (twm == next - 1) {
1894 			/* init LOG_UPDATEMAP for logredo() to update bmap for
1895 			 * free of truncated delta extent of the truncated
1896 			 * entry XAD[next - 1]:
1897 			 * (xtlck->pxdlock = truncated delta extent);
1898 			 */
1899 			pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
1900 			/* assert(pxdlock->type & tlckTRUNCATE); */
1901 			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1902 			lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1903 			lrd->log.updatemap.nxd = cpu_to_le16(1);
1904 			lrd->log.updatemap.pxd = pxdlock->pxd;
1905 			tpxd = pxdlock->pxd;	/* save to format maplock */
1906 			lrd->backchain =
1907 			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1908 		}
1909 
1910 		/*
1911 		 * free entries XAD[next:hwm]:
1912 		 */
1913 		if (hwm >= next) {
1914 			/* init LOG_UPDATEMAP of the freed extents
1915 			 * XAD[next:hwm] from the deleted page itself
1916 			 * for logredo() to update bmap;
1917 			 */
1918 			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1919 			lrd->log.updatemap.type =
1920 			    cpu_to_le16(LOG_FREEXADLIST);
1921 			xtlck = (struct xtlock *) & tlck->lock;
1922 			hwm = xtlck->hwm.offset;
1923 			lrd->log.updatemap.nxd =
1924 			    cpu_to_le16(hwm - next + 1);
1925 			/* reformat linelock for lmLog() */
1926 			xtlck->header.offset = next;
1927 			xtlck->header.length = hwm - next + 1;
1928 			xtlck->index = 1;
1929 			lrd->backchain =
1930 			    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1931 		}
1932 
1933 		/*
1934 		 *      format maplock(s) for txUpdateMap() to update bmap
1935 		 */
1936 		maplock->index = 0;
1937 
1938 		/*
1939 		 * allocate entries XAD[lwm:next):
1940 		 */
1941 		if (lwm < next) {
1942 			/* format a maplock for txUpdateMap() to update bPMAP
1943 			 * for alloc of new/extended extents of XAD[lwm:next)
1944 			 * from the page itself;
1945 			 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1946 			 */
1947 			tlck->flag |= tlckUPDATEMAP;
1948 			xadlock->flag = mlckALLOCXADLIST;
1949 			xadlock->count = next - lwm;
1950 			xadlock->xdlist = &p->xad[lwm];
1951 
1952 			jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d "
1953 				 "lwm:%d next:%d",
1954 				 tlck->ip, mp, xadlock->count, lwm, next);
1955 			maplock->index++;
1956 			xadlock++;
1957 		}
1958 
1959 		/*
1960 		 * truncate entry XAD[twm == next - 1]:
1961 		 */
1962 		if (twm == next - 1) {
1963 			struct pxd_lock *pxdlock;
1964 
1965 			/* format a maplock for txUpdateMap() to update bmap
1966 			 * to free truncated delta extent of the truncated
1967 			 * entry XAD[next - 1];
1968 			 * (xtlck->pxdlock = truncated delta extent);
1969 			 */
1970 			tlck->flag |= tlckUPDATEMAP;
1971 			pxdlock = (struct pxd_lock *) xadlock;
1972 			pxdlock->flag = mlckFREEPXD;
1973 			pxdlock->count = 1;
1974 			pxdlock->pxd = tpxd;
1975 
1976 			jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d "
1977 				 "hwm:%d", ip, mp, pxdlock->count, hwm);
1978 			maplock->index++;
1979 			xadlock++;
1980 		}
1981 
1982 		/*
1983 		 * free entries XAD[next:hwm]:
1984 		 */
1985 		if (hwm >= next) {
1986 			/* format a maplock for txUpdateMap() to update bmap
1987 			 * to free extents of XAD[next:hwm] from thedeleted
1988 			 * page itself;
1989 			 */
1990 			tlck->flag |= tlckUPDATEMAP;
1991 			xadlock->flag = mlckFREEXADLIST;
1992 			xadlock->count = hwm - next + 1;
1993 			xadlock->xdlist = &p->xad[next];
1994 
1995 			jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d "
1996 				 "next:%d hwm:%d",
1997 				 tlck->ip, mp, xadlock->count, next, hwm);
1998 			maplock->index++;
1999 		}
2000 
2001 		/* mark page as homeward bound */
2002 		tlck->flag |= tlckWRITEPAGE;
2003 	}
2004 	return;
2005 }
2006 
2007 
2008 /*
2009  *      mapLog()
2010  *
2011  * function:    log from maplock of freed data extents;
2012  */
mapLog(struct jfs_log * log,struct tblock * tblk,struct lrd * lrd,struct tlock * tlck)2013 void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2014 	    struct tlock * tlck)
2015 {
2016 	struct pxd_lock *pxdlock;
2017 	int i, nlock;
2018 	pxd_t *pxd;
2019 
2020 	/*
2021 	 *      page relocation: free the source page extent
2022 	 *
2023 	 * a maplock for txUpdateMap() for free of the page
2024 	 * has been formatted at txLock() time saving the src
2025 	 * relocated page address;
2026 	 */
2027 	if (tlck->type & tlckRELOCATE) {
2028 		/* log LOG_NOREDOPAGE of the old relocated page
2029 		 * for logredo() to start NoRedoPage filter;
2030 		 */
2031 		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
2032 		pxdlock = (struct pxd_lock *) & tlck->lock;
2033 		pxd = &lrd->log.redopage.pxd;
2034 		*pxd = pxdlock->pxd;
2035 		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2036 
2037 		/* (N.B. currently, logredo() does NOT update bmap
2038 		 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
2039 		 * if page free from relocation, LOG_UPDATEMAP log is
2040 		 * specifically generated now for logredo()
2041 		 * to update bmap for free of src relocated page;
2042 		 * (new flag LOG_RELOCATE may be introduced which will
2043 		 * inform logredo() to start NORedoPage filter and also
2044 		 * update block allocation map at the same time, thus
2045 		 * avoiding an extra log write);
2046 		 */
2047 		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2048 		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
2049 		lrd->log.updatemap.nxd = cpu_to_le16(1);
2050 		lrd->log.updatemap.pxd = pxdlock->pxd;
2051 		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2052 
2053 		/* a maplock for txUpdateMap() for free of the page
2054 		 * has been formatted at txLock() time;
2055 		 */
2056 		tlck->flag |= tlckUPDATEMAP;
2057 		return;
2058 	}
2059 	/*
2060 
2061 	 * Otherwise it's not a relocate request
2062 	 *
2063 	 */
2064 	else {
2065 		/* log LOG_UPDATEMAP for logredo() to update bmap for
2066 		 * free of truncated/relocated delta extent of the data;
2067 		 * e.g.: external EA extent, relocated/truncated extent
2068 		 * from xtTailgate();
2069 		 */
2070 		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2071 		pxdlock = (struct pxd_lock *) & tlck->lock;
2072 		nlock = pxdlock->index;
2073 		for (i = 0; i < nlock; i++, pxdlock++) {
2074 			if (pxdlock->flag & mlckALLOCPXD)
2075 				lrd->log.updatemap.type =
2076 				    cpu_to_le16(LOG_ALLOCPXD);
2077 			else
2078 				lrd->log.updatemap.type =
2079 				    cpu_to_le16(LOG_FREEPXD);
2080 			lrd->log.updatemap.nxd = cpu_to_le16(1);
2081 			lrd->log.updatemap.pxd = pxdlock->pxd;
2082 			lrd->backchain =
2083 			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2084 			jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
2085 				 (ulong) addressPXD(&pxdlock->pxd),
2086 				 lengthPXD(&pxdlock->pxd));
2087 		}
2088 
2089 		/* update bmap */
2090 		tlck->flag |= tlckUPDATEMAP;
2091 	}
2092 }
2093 
2094 
2095 /*
2096  *      txEA()
2097  *
2098  * function:    acquire maplock for EA/ACL extents or
2099  *              set COMMIT_INLINE flag;
2100  */
txEA(tid_t tid,struct inode * ip,dxd_t * oldea,dxd_t * newea)2101 void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2102 {
2103 	struct tlock *tlck = NULL;
2104 	struct pxd_lock *maplock = NULL, *pxdlock = NULL;
2105 
2106 	/*
2107 	 * format maplock for alloc of new EA extent
2108 	 */
2109 	if (newea) {
2110 		/* Since the newea could be a completely zeroed entry we need to
2111 		 * check for the two flags which indicate we should actually
2112 		 * commit new EA data
2113 		 */
2114 		if (newea->flag & DXD_EXTENT) {
2115 			tlck = txMaplock(tid, ip, tlckMAP);
2116 			maplock = (struct pxd_lock *) & tlck->lock;
2117 			pxdlock = (struct pxd_lock *) maplock;
2118 			pxdlock->flag = mlckALLOCPXD;
2119 			PXDaddress(&pxdlock->pxd, addressDXD(newea));
2120 			PXDlength(&pxdlock->pxd, lengthDXD(newea));
2121 			pxdlock++;
2122 			maplock->index = 1;
2123 		} else if (newea->flag & DXD_INLINE) {
2124 			tlck = NULL;
2125 
2126 			set_cflag(COMMIT_Inlineea, ip);
2127 		}
2128 	}
2129 
2130 	/*
2131 	 * format maplock for free of old EA extent
2132 	 */
2133 	if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
2134 		if (tlck == NULL) {
2135 			tlck = txMaplock(tid, ip, tlckMAP);
2136 			maplock = (struct pxd_lock *) & tlck->lock;
2137 			pxdlock = (struct pxd_lock *) maplock;
2138 			maplock->index = 0;
2139 		}
2140 		pxdlock->flag = mlckFREEPXD;
2141 		PXDaddress(&pxdlock->pxd, addressDXD(oldea));
2142 		PXDlength(&pxdlock->pxd, lengthDXD(oldea));
2143 		maplock->index++;
2144 	}
2145 }
2146 
2147 
2148 /*
2149  *      txForce()
2150  *
2151  * function: synchronously write pages locked by transaction
2152  *              after txLog() but before txUpdateMap();
2153  */
txForce(struct tblock * tblk)2154 void txForce(struct tblock * tblk)
2155 {
2156 	struct tlock *tlck;
2157 	lid_t lid, next;
2158 	struct metapage *mp;
2159 
2160 	/*
2161 	 * reverse the order of transaction tlocks in
2162 	 * careful update order of address index pages
2163 	 * (right to left, bottom up)
2164 	 */
2165 	tlck = lid_to_tlock(tblk->next);
2166 	lid = tlck->next;
2167 	tlck->next = 0;
2168 	while (lid) {
2169 		tlck = lid_to_tlock(lid);
2170 		next = tlck->next;
2171 		tlck->next = tblk->next;
2172 		tblk->next = lid;
2173 		lid = next;
2174 	}
2175 
2176 	/*
2177 	 * synchronously write the page, and
2178 	 * hold the page for txUpdateMap();
2179 	 */
2180 	for (lid = tblk->next; lid; lid = next) {
2181 		tlck = lid_to_tlock(lid);
2182 		next = tlck->next;
2183 
2184 		if ((mp = tlck->mp) != NULL &&
2185 		    (tlck->type & tlckBTROOT) == 0) {
2186 			assert(mp->xflag & COMMIT_PAGE);
2187 
2188 			if (tlck->flag & tlckWRITEPAGE) {
2189 				tlck->flag &= ~tlckWRITEPAGE;
2190 
2191 				/* do not release page to freelist */
2192 
2193 				/*
2194 				 * The "right" thing to do here is to
2195 				 * synchronously write the metadata.
2196 				 * With the current implementation this
2197 				 * is hard since write_metapage requires
2198 				 * us to kunmap & remap the page.  If we
2199 				 * have tlocks pointing into the metadata
2200 				 * pages, we don't want to do this.  I think
2201 				 * we can get by with synchronously writing
2202 				 * the pages when they are released.
2203 				 */
2204 				assert(atomic_read(&mp->nohomeok));
2205 				set_bit(META_dirty, &mp->flag);
2206 				set_bit(META_sync, &mp->flag);
2207 			}
2208 		}
2209 	}
2210 }
2211 
2212 
2213 /*
2214  *      txUpdateMap()
2215  *
2216  * function:    update persistent allocation map (and working map
2217  *              if appropriate);
2218  *
2219  * parameter:
2220  */
txUpdateMap(struct tblock * tblk)2221 static void txUpdateMap(struct tblock * tblk)
2222 {
2223 	struct inode *ip;
2224 	struct inode *ipimap;
2225 	lid_t lid;
2226 	struct tlock *tlck;
2227 	struct maplock *maplock;
2228 	struct pxd_lock pxdlock;
2229 	int maptype;
2230 	int k, nlock;
2231 	struct metapage *mp = 0;
2232 
2233 	ipimap = JFS_SBI(tblk->sb)->ipimap;
2234 
2235 	maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
2236 
2237 
2238 	/*
2239 	 *      update block allocation map
2240 	 *
2241 	 * update allocation state in pmap (and wmap) and
2242 	 * update lsn of the pmap page;
2243 	 */
2244 	/*
2245 	 * scan each tlock/page of transaction for block allocation/free:
2246 	 *
2247 	 * for each tlock/page of transaction, update map.
2248 	 *  ? are there tlock for pmap and pwmap at the same time ?
2249 	 */
2250 	for (lid = tblk->next; lid; lid = tlck->next) {
2251 		tlck = lid_to_tlock(lid);
2252 
2253 		if ((tlck->flag & tlckUPDATEMAP) == 0)
2254 			continue;
2255 
2256 		if (tlck->flag & tlckFREEPAGE) {
2257 			/*
2258 			 * Another thread may attempt to reuse freed space
2259 			 * immediately, so we want to get rid of the metapage
2260 			 * before anyone else has a chance to get it.
2261 			 * Lock metapage, update maps, then invalidate
2262 			 * the metapage.
2263 			 */
2264 			mp = tlck->mp;
2265 			ASSERT(mp->xflag & COMMIT_PAGE);
2266 			hold_metapage(mp, 0);
2267 		}
2268 
2269 		/*
2270 		 * extent list:
2271 		 * . in-line PXD list:
2272 		 * . out-of-line XAD list:
2273 		 */
2274 		maplock = (struct maplock *) & tlck->lock;
2275 		nlock = maplock->index;
2276 
2277 		for (k = 0; k < nlock; k++, maplock++) {
2278 			/*
2279 			 * allocate blocks in persistent map:
2280 			 *
2281 			 * blocks have been allocated from wmap at alloc time;
2282 			 */
2283 			if (maplock->flag & mlckALLOC) {
2284 				txAllocPMap(ipimap, maplock, tblk);
2285 			}
2286 			/*
2287 			 * free blocks in persistent and working map:
2288 			 * blocks will be freed in pmap and then in wmap;
2289 			 *
2290 			 * ? tblock specifies the PMAP/PWMAP based upon
2291 			 * transaction
2292 			 *
2293 			 * free blocks in persistent map:
2294 			 * blocks will be freed from wmap at last reference
2295 			 * release of the object for regular files;
2296 			 *
2297 			 * Alway free blocks from both persistent & working
2298 			 * maps for directories
2299 			 */
2300 			else {	/* (maplock->flag & mlckFREE) */
2301 
2302 				if (S_ISDIR(tlck->ip->i_mode))
2303 					txFreeMap(ipimap, maplock,
2304 						  tblk, COMMIT_PWMAP);
2305 				else
2306 					txFreeMap(ipimap, maplock,
2307 						  tblk, maptype);
2308 			}
2309 		}
2310 		if (tlck->flag & tlckFREEPAGE) {
2311 			if (!(tblk->flag & tblkGC_LAZY)) {
2312 				/* This is equivalent to txRelease */
2313 				ASSERT(mp->lid == lid);
2314 				tlck->mp->lid = 0;
2315 			}
2316 			assert(atomic_read(&mp->nohomeok) == 1);
2317 			atomic_dec(&mp->nohomeok);
2318 			discard_metapage(mp);
2319 			tlck->mp = 0;
2320 		}
2321 	}
2322 	/*
2323 	 *      update inode allocation map
2324 	 *
2325 	 * update allocation state in pmap and
2326 	 * update lsn of the pmap page;
2327 	 * update in-memory inode flag/state
2328 	 *
2329 	 * unlock mapper/write lock
2330 	 */
2331 	if (tblk->xflag & COMMIT_CREATE) {
2332 		ip = tblk->ip;
2333 
2334 		ASSERT(test_cflag(COMMIT_New, ip));
2335 		clear_cflag(COMMIT_New, ip);
2336 
2337 		diUpdatePMap(ipimap, ip->i_ino, FALSE, tblk);
2338 		ipimap->i_state |= I_DIRTY;
2339 		/* update persistent block allocation map
2340 		 * for the allocation of inode extent;
2341 		 */
2342 		pxdlock.flag = mlckALLOCPXD;
2343 		pxdlock.pxd = JFS_IP(ip)->ixpxd;
2344 		pxdlock.index = 1;
2345 		txAllocPMap(ip, (struct maplock *) & pxdlock, tblk);
2346 		iput(ip);
2347 	} else if (tblk->xflag & COMMIT_DELETE) {
2348 		ip = tblk->ip;
2349 		diUpdatePMap(ipimap, ip->i_ino, TRUE, tblk);
2350 		ipimap->i_state |= I_DIRTY;
2351 		iput(ip);
2352 	}
2353 }
2354 
2355 
2356 /*
2357  *      txAllocPMap()
2358  *
2359  * function: allocate from persistent map;
2360  *
2361  * parameter:
2362  *      ipbmap  -
2363  *      malock -
2364  *              xad list:
2365  *              pxd:
2366  *
2367  *      maptype -
2368  *              allocate from persistent map;
2369  *              free from persistent map;
2370  *              (e.g., tmp file - free from working map at releae
2371  *               of last reference);
2372  *              free from persistent and working map;
2373  *
2374  *      lsn     - log sequence number;
2375  */
txAllocPMap(struct inode * ip,struct maplock * maplock,struct tblock * tblk)2376 static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2377 			struct tblock * tblk)
2378 {
2379 	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2380 	struct xdlistlock *xadlistlock;
2381 	xad_t *xad;
2382 	s64 xaddr;
2383 	int xlen;
2384 	struct pxd_lock *pxdlock;
2385 	struct xdlistlock *pxdlistlock;
2386 	pxd_t *pxd;
2387 	int n;
2388 
2389 	/*
2390 	 * allocate from persistent map;
2391 	 */
2392 	if (maplock->flag & mlckALLOCXADLIST) {
2393 		xadlistlock = (struct xdlistlock *) maplock;
2394 		xad = xadlistlock->xdlist;
2395 		for (n = 0; n < xadlistlock->count; n++, xad++) {
2396 			if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
2397 				xaddr = addressXAD(xad);
2398 				xlen = lengthXAD(xad);
2399 				dbUpdatePMap(ipbmap, FALSE, xaddr,
2400 					     (s64) xlen, tblk);
2401 				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
2402 				jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2403 					 (ulong) xaddr, xlen);
2404 			}
2405 		}
2406 	} else if (maplock->flag & mlckALLOCPXD) {
2407 		pxdlock = (struct pxd_lock *) maplock;
2408 		xaddr = addressPXD(&pxdlock->pxd);
2409 		xlen = lengthPXD(&pxdlock->pxd);
2410 		dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, tblk);
2411 		jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
2412 	} else {		/* (maplock->flag & mlckALLOCPXDLIST) */
2413 
2414 		pxdlistlock = (struct xdlistlock *) maplock;
2415 		pxd = pxdlistlock->xdlist;
2416 		for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2417 			xaddr = addressPXD(pxd);
2418 			xlen = lengthPXD(pxd);
2419 			dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen,
2420 				     tblk);
2421 			jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2422 				 (ulong) xaddr, xlen);
2423 		}
2424 	}
2425 }
2426 
2427 
2428 /*
2429  *      txFreeMap()
2430  *
2431  * function:    free from persistent and/or working map;
2432  *
2433  * todo: optimization
2434  */
txFreeMap(struct inode * ip,struct maplock * maplock,struct tblock * tblk,int maptype)2435 void txFreeMap(struct inode *ip,
2436 	       struct maplock * maplock, struct tblock * tblk, int maptype)
2437 {
2438 	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2439 	struct xdlistlock *xadlistlock;
2440 	xad_t *xad;
2441 	s64 xaddr;
2442 	int xlen;
2443 	struct pxd_lock *pxdlock;
2444 	struct xdlistlock *pxdlistlock;
2445 	pxd_t *pxd;
2446 	int n;
2447 
2448 	jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
2449 		 tblk, maplock, maptype);
2450 
2451 	/*
2452 	 * free from persistent map;
2453 	 */
2454 	if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
2455 		if (maplock->flag & mlckFREEXADLIST) {
2456 			xadlistlock = (struct xdlistlock *) maplock;
2457 			xad = xadlistlock->xdlist;
2458 			for (n = 0; n < xadlistlock->count; n++, xad++) {
2459 				if (!(xad->flag & XAD_NEW)) {
2460 					xaddr = addressXAD(xad);
2461 					xlen = lengthXAD(xad);
2462 					dbUpdatePMap(ipbmap, TRUE, xaddr,
2463 						     (s64) xlen, tblk);
2464 					jfs_info("freePMap: xaddr:0x%lx "
2465 						 "xlen:%d",
2466 						 (ulong) xaddr, xlen);
2467 				}
2468 			}
2469 		} else if (maplock->flag & mlckFREEPXD) {
2470 			pxdlock = (struct pxd_lock *) maplock;
2471 			xaddr = addressPXD(&pxdlock->pxd);
2472 			xlen = lengthPXD(&pxdlock->pxd);
2473 			dbUpdatePMap(ipbmap, TRUE, xaddr, (s64) xlen,
2474 				     tblk);
2475 			jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2476 				 (ulong) xaddr, xlen);
2477 		} else {	/* (maplock->flag & mlckALLOCPXDLIST) */
2478 
2479 			pxdlistlock = (struct xdlistlock *) maplock;
2480 			pxd = pxdlistlock->xdlist;
2481 			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2482 				xaddr = addressPXD(pxd);
2483 				xlen = lengthPXD(pxd);
2484 				dbUpdatePMap(ipbmap, TRUE, xaddr,
2485 					     (s64) xlen, tblk);
2486 				jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2487 					 (ulong) xaddr, xlen);
2488 			}
2489 		}
2490 	}
2491 
2492 	/*
2493 	 * free from working map;
2494 	 */
2495 	if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
2496 		if (maplock->flag & mlckFREEXADLIST) {
2497 			xadlistlock = (struct xdlistlock *) maplock;
2498 			xad = xadlistlock->xdlist;
2499 			for (n = 0; n < xadlistlock->count; n++, xad++) {
2500 				xaddr = addressXAD(xad);
2501 				xlen = lengthXAD(xad);
2502 				dbFree(ip, xaddr, (s64) xlen);
2503 				xad->flag = 0;
2504 				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2505 					 (ulong) xaddr, xlen);
2506 			}
2507 		} else if (maplock->flag & mlckFREEPXD) {
2508 			pxdlock = (struct pxd_lock *) maplock;
2509 			xaddr = addressPXD(&pxdlock->pxd);
2510 			xlen = lengthPXD(&pxdlock->pxd);
2511 			dbFree(ip, xaddr, (s64) xlen);
2512 			jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2513 				 (ulong) xaddr, xlen);
2514 		} else {	/* (maplock->flag & mlckFREEPXDLIST) */
2515 
2516 			pxdlistlock = (struct xdlistlock *) maplock;
2517 			pxd = pxdlistlock->xdlist;
2518 			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2519 				xaddr = addressPXD(pxd);
2520 				xlen = lengthPXD(pxd);
2521 				dbFree(ip, xaddr, (s64) xlen);
2522 				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2523 					 (ulong) xaddr, xlen);
2524 			}
2525 		}
2526 	}
2527 }
2528 
2529 
2530 /*
2531  *      txFreelock()
2532  *
2533  * function:    remove tlock from inode anonymous locklist
2534  */
txFreelock(struct inode * ip)2535 void txFreelock(struct inode *ip)
2536 {
2537 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2538 	struct tlock *xtlck, *tlck;
2539 	lid_t xlid = 0, lid;
2540 
2541 	if (!jfs_ip->atlhead)
2542 		return;
2543 
2544 	xtlck = (struct tlock *) &jfs_ip->atlhead;
2545 
2546 	while ((lid = xtlck->next)) {
2547 		tlck = lid_to_tlock(lid);
2548 		if (tlck->flag & tlckFREELOCK) {
2549 			xtlck->next = tlck->next;
2550 			txLockFree(lid);
2551 		} else {
2552 			xtlck = tlck;
2553 			xlid = lid;
2554 		}
2555 	}
2556 
2557 	if (jfs_ip->atlhead)
2558 		jfs_ip->atltail = xlid;
2559 	else {
2560 		jfs_ip->atltail = 0;
2561 		/*
2562 		 * If inode was on anon_list, remove it
2563 		 */
2564 		TXN_LOCK();
2565 		list_del_init(&jfs_ip->anon_inode_list);
2566 		TXN_UNLOCK();
2567 	}
2568 }
2569 
2570 
2571 /*
2572  *      txAbort()
2573  *
2574  * function: abort tx before commit;
2575  *
2576  * frees line-locks and segment locks for all
2577  * segments in comdata structure.
2578  * Optionally sets state of file-system to FM_DIRTY in super-block.
2579  * log age of page-frames in memory for which caller has
2580  * are reset to 0 (to avoid logwarap).
2581  */
txAbort(tid_t tid,int dirty)2582 void txAbort(tid_t tid, int dirty)
2583 {
2584 	lid_t lid, next;
2585 	struct metapage *mp;
2586 	struct tblock *tblk = tid_to_tblock(tid);
2587 	struct tlock *tlck;
2588 
2589 	jfs_warn("txAbort: tid:%d dirty:0x%x", tid, dirty);
2590 
2591 	/*
2592 	 * free tlocks of the transaction
2593 	 */
2594 	for (lid = tblk->next; lid; lid = next) {
2595 		tlck = lid_to_tlock(lid);
2596 		next = tlck->next;
2597 		mp = tlck->mp;
2598 		JFS_IP(tlck->ip)->xtlid = 0;
2599 
2600 		if (mp) {
2601 			mp->lid = 0;
2602 
2603 			/*
2604 			 * reset lsn of page to avoid logwarap:
2605 			 *
2606 			 * (page may have been previously committed by another
2607 			 * transaction(s) but has not been paged, i.e.,
2608 			 * it may be on logsync list even though it has not
2609 			 * been logged for the current tx.)
2610 			 */
2611 			if (mp->xflag & COMMIT_PAGE && mp->lsn)
2612 				LogSyncRelease(mp);
2613 		}
2614 		/* insert tlock at head of freelist */
2615 		TXN_LOCK();
2616 		txLockFree(lid);
2617 		TXN_UNLOCK();
2618 	}
2619 
2620 	/* caller will free the transaction block */
2621 
2622 	tblk->next = tblk->last = 0;
2623 
2624 	/*
2625 	 * mark filesystem dirty
2626 	 */
2627 	if (dirty)
2628 		jfs_error(tblk->sb, "txAbort");
2629 
2630 	return;
2631 }
2632 
2633 /*
2634  *      txLazyCommit(void)
2635  *
2636  *	All transactions except those changing ipimap (COMMIT_FORCE) are
2637  *	processed by this routine.  This insures that the inode and block
2638  *	allocation maps are updated in order.  For synchronous transactions,
2639  *	let the user thread finish processing after txUpdateMap() is called.
2640  */
txLazyCommit(struct tblock * tblk)2641 static void txLazyCommit(struct tblock * tblk)
2642 {
2643 	struct jfs_log *log;
2644 
2645 	while (((tblk->flag & tblkGC_READY) == 0) &&
2646 	       ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
2647 		/* We must have gotten ahead of the user thread
2648 		 */
2649 		jfs_info("txLazyCommit: tblk 0x%p not unlocked", tblk);
2650 		yield();
2651 	}
2652 
2653 	jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
2654 
2655 	txUpdateMap(tblk);
2656 
2657 	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
2658 
2659 	spin_lock_irq(&log->gclock);	// LOGGC_LOCK
2660 
2661 	tblk->flag |= tblkGC_COMMITTED;
2662 
2663 	if (tblk->flag & tblkGC_READY)
2664 		log->gcrtc--;
2665 
2666 	wake_up_all(&tblk->gcwait);	// LOGGC_WAKEUP
2667 
2668 	/*
2669 	 * Can't release log->gclock until we've tested tblk->flag
2670 	 */
2671 	if (tblk->flag & tblkGC_LAZY) {
2672 		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2673 		txUnlock(tblk);
2674 		tblk->flag &= ~tblkGC_LAZY;
2675 		txEnd(tblk - TxBlock);	/* Convert back to tid */
2676 	} else
2677 		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2678 
2679 	jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
2680 }
2681 
2682 /*
2683  *      jfs_lazycommit(void)
2684  *
2685  *	To be run as a kernel daemon.  If lbmIODone is called in an interrupt
2686  *	context, or where blocking is not wanted, this routine will process
2687  *	committed transactions from the unlock queue.
2688  */
jfs_lazycommit(void * arg)2689 int jfs_lazycommit(void *arg)
2690 {
2691 	int WorkDone;
2692 	struct tblock *tblk;
2693 	unsigned long flags;
2694 
2695 	lock_kernel();
2696 
2697 	daemonize();
2698 	current->tty = NULL;
2699 	strcpy(current->comm, "jfsCommit");
2700 
2701 	unlock_kernel();
2702 
2703 	jfsCommitTask = current;
2704 
2705 	spin_lock_irq(&current->sigmask_lock);
2706 	sigfillset(&current->blocked);
2707 	recalc_sigpending(current);
2708 	spin_unlock_irq(&current->sigmask_lock);
2709 
2710 	LAZY_LOCK_INIT();
2711 	TxAnchor.unlock_queue = TxAnchor.unlock_tail = 0;
2712 
2713 	complete(&jfsIOwait);
2714 
2715 	do {
2716 		DECLARE_WAITQUEUE(wq, current);
2717 
2718 		LAZY_LOCK(flags);
2719 restart:
2720 		WorkDone = 0;
2721 		while ((tblk = TxAnchor.unlock_queue)) {
2722 			/*
2723 			 * We can't get ahead of user thread.  Spinning is
2724 			 * simpler than blocking/waking.  We shouldn't spin
2725 			 * very long, since user thread shouldn't be blocking
2726 			 * between lmGroupCommit & txEnd.
2727 			 */
2728 			WorkDone = 1;
2729 
2730 			/*
2731 			 * Remove first transaction from queue
2732 			 */
2733 			TxAnchor.unlock_queue = tblk->cqnext;
2734 			tblk->cqnext = 0;
2735 			if (TxAnchor.unlock_tail == tblk)
2736 				TxAnchor.unlock_tail = 0;
2737 
2738 			LAZY_UNLOCK(flags);
2739 			txLazyCommit(tblk);
2740 
2741 			/*
2742 			 * We can be running indefinately if other processors
2743 			 * are adding transactions to this list
2744 			 */
2745 			cond_resched();
2746 			LAZY_LOCK(flags);
2747 		}
2748 
2749 		if (WorkDone)
2750 			goto restart;
2751 
2752 		add_wait_queue(&jfs_commit_thread_wait, &wq);
2753 		set_current_state(TASK_INTERRUPTIBLE);
2754 		LAZY_UNLOCK(flags);
2755 		schedule();
2756 		current->state = TASK_RUNNING;
2757 		remove_wait_queue(&jfs_commit_thread_wait, &wq);
2758 	} while (!jfs_stop_threads);
2759 
2760 	if (TxAnchor.unlock_queue)
2761 		jfs_err("jfs_lazycommit being killed w/pending transactions!");
2762 	else
2763 		jfs_info("jfs_lazycommit being killed\n");
2764 	complete_and_exit(&jfsIOwait, 0);
2765 }
2766 
txLazyUnlock(struct tblock * tblk)2767 void txLazyUnlock(struct tblock * tblk)
2768 {
2769 	unsigned long flags;
2770 
2771 	LAZY_LOCK(flags);
2772 
2773 	if (TxAnchor.unlock_tail)
2774 		TxAnchor.unlock_tail->cqnext = tblk;
2775 	else
2776 		TxAnchor.unlock_queue = tblk;
2777 	TxAnchor.unlock_tail = tblk;
2778 	tblk->cqnext = 0;
2779 	LAZY_UNLOCK(flags);
2780 	wake_up(&jfs_commit_thread_wait);
2781 }
2782 
LogSyncRelease(struct metapage * mp)2783 static void LogSyncRelease(struct metapage * mp)
2784 {
2785 	struct jfs_log *log = mp->log;
2786 
2787 	assert(atomic_read(&mp->nohomeok));
2788 	assert(log);
2789 	atomic_dec(&mp->nohomeok);
2790 
2791 	if (atomic_read(&mp->nohomeok))
2792 		return;
2793 
2794 	hold_metapage(mp, 0);
2795 
2796 	LOGSYNC_LOCK(log);
2797 	mp->log = NULL;
2798 	mp->lsn = 0;
2799 	mp->clsn = 0;
2800 	log->count--;
2801 	list_del_init(&mp->synclist);
2802 	LOGSYNC_UNLOCK(log);
2803 
2804 	release_metapage(mp);
2805 }
2806 
2807 /*
2808  *	txQuiesce
2809  *
2810  *	Block all new transactions and push anonymous transactions to
2811  *	completion
2812  *
2813  *	This does almost the same thing as jfs_sync below.  We don't
2814  *	worry about deadlocking when TlocksLow is set, since we would
2815  *	expect jfs_sync to get us out of that jam.
2816  */
txQuiesce(struct super_block * sb)2817 void txQuiesce(struct super_block *sb)
2818 {
2819 	struct inode *ip;
2820 	struct jfs_inode_info *jfs_ip;
2821 	struct jfs_log *log = JFS_SBI(sb)->log;
2822 	tid_t tid;
2823 
2824 	set_bit(log_QUIESCE, &log->flag);
2825 
2826 	TXN_LOCK();
2827 restart:
2828 	while (!list_empty(&TxAnchor.anon_list)) {
2829 		jfs_ip = list_entry(TxAnchor.anon_list.next,
2830 				    struct jfs_inode_info,
2831 				    anon_inode_list);
2832 		ip = jfs_ip->inode;
2833 
2834 		/*
2835 		 * inode will be removed from anonymous list
2836 		 * when it is committed
2837 		 */
2838 		TXN_UNLOCK();
2839 		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
2840 		down(&jfs_ip->commit_sem);
2841 		txCommit(tid, 1, &ip, 0);
2842 		txEnd(tid);
2843 		up(&jfs_ip->commit_sem);
2844 		/*
2845 		 * Just to be safe.  I don't know how
2846 		 * long we can run without blocking
2847 		 */
2848 		cond_resched();
2849 		TXN_LOCK();
2850 	}
2851 
2852 	/*
2853 	 * If jfs_sync is running in parallel, there could be some inodes
2854 	 * on anon_list2.  Let's check.
2855 	 */
2856 	if (!list_empty(&TxAnchor.anon_list2)) {
2857 		list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2858 		INIT_LIST_HEAD(&TxAnchor.anon_list2);
2859 		goto restart;
2860 	}
2861 	TXN_UNLOCK();
2862 
2863 	/*
2864 	 * We may need to kick off the group commit
2865 	 */
2866 	jfs_flush_journal(log, 0);
2867 }
2868 
2869 /*
2870  * txResume()
2871  *
2872  * Allows transactions to start again following txQuiesce
2873  */
txResume(struct super_block * sb)2874 void txResume(struct super_block *sb)
2875 {
2876 	struct jfs_log *log = JFS_SBI(sb)->log;
2877 
2878 	clear_bit(log_QUIESCE, &log->flag);
2879 	TXN_WAKEUP(&log->syncwait);
2880 }
2881 
2882 /*
2883  *      jfs_sync(void)
2884  *
2885  *	To be run as a kernel daemon.  This is awakened when tlocks run low.
2886  *	We write any inodes that have anonymous tlocks so they will become
2887  *	available.
2888  */
jfs_sync(void * arg)2889 int jfs_sync(void *arg)
2890 {
2891 	struct inode *ip;
2892 	struct jfs_inode_info *jfs_ip;
2893 	int rc;
2894 	tid_t tid;
2895 
2896 	lock_kernel();
2897 
2898 	daemonize();
2899 	current->tty = NULL;
2900 	strcpy(current->comm, "jfsSync");
2901 
2902 	unlock_kernel();
2903 
2904 	spin_lock_irq(&current->sigmask_lock);
2905 	sigfillset(&current->blocked);
2906 	recalc_sigpending(current);
2907 	spin_unlock_irq(&current->sigmask_lock);
2908 
2909 	complete(&jfsIOwait);
2910 
2911 	do {
2912 		DECLARE_WAITQUEUE(wq, current);
2913 		/*
2914 		 * write each inode on the anonymous inode list
2915 		 */
2916 		TXN_LOCK();
2917 		while (TxAnchor.TlocksLow && !list_empty(&TxAnchor.anon_list)) {
2918 			jfs_ip = list_entry(TxAnchor.anon_list.next,
2919 					    struct jfs_inode_info,
2920 					    anon_inode_list);
2921 			ip = jfs_ip->inode;
2922 
2923 			if (! igrab(ip)) {
2924 				/*
2925 				 * Inode is being freed
2926 				 */
2927 				list_del_init(&jfs_ip->anon_inode_list);
2928 			} else if (! down_trylock(&jfs_ip->commit_sem)) {
2929 				/*
2930 				 * inode will be removed from anonymous list
2931 				 * when it is committed
2932 				 */
2933 				TXN_UNLOCK();
2934 				tid = txBegin(ip->i_sb, COMMIT_INODE);
2935 				rc = txCommit(tid, 1, &ip, 0);
2936 				txEnd(tid);
2937 				up(&jfs_ip->commit_sem);
2938 
2939 				iput(ip);
2940 				/*
2941 				 * Just to be safe.  I don't know how
2942 				 * long we can run without blocking
2943 				 */
2944 				cond_resched();
2945 				TXN_LOCK();
2946 			} else {
2947 				/* We can't get the commit semaphore.  It may
2948 				 * be held by a thread waiting for tlock's
2949 				 * so let's not block here.  Save it to
2950 				 * put back on the anon_list.
2951 				 */
2952 
2953 				/* Take off anon_list */
2954 				list_del(&jfs_ip->anon_inode_list);
2955 
2956 				/* Put on anon_list2 */
2957 				list_add(&jfs_ip->anon_inode_list,
2958 					 &TxAnchor.anon_list2);
2959 
2960 				TXN_UNLOCK();
2961 				iput(ip);
2962 				TXN_LOCK();
2963 			}
2964 		}
2965 		/* Add anon_list2 back to anon_list */
2966 		if (!list_empty(&TxAnchor.anon_list2)) {
2967 			list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2968 			INIT_LIST_HEAD(&TxAnchor.anon_list2);
2969 		}
2970 		add_wait_queue(&jfs_sync_thread_wait, &wq);
2971 		set_current_state(TASK_INTERRUPTIBLE);
2972 		TXN_UNLOCK();
2973 		schedule();
2974 		current->state = TASK_RUNNING;
2975 		remove_wait_queue(&jfs_sync_thread_wait, &wq);
2976 	} while (!jfs_stop_threads);
2977 
2978 	jfs_info("jfs_sync being killed");
2979 	complete_and_exit(&jfsIOwait, 0);
2980 }
2981 
2982 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
jfs_txanchor_read(char * buffer,char ** start,off_t offset,int length,int * eof,void * data)2983 int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length,
2984 		      int *eof, void *data)
2985 {
2986 	int len = 0;
2987 	off_t begin;
2988 	char *freewait;
2989 	char *freelockwait;
2990 	char *lowlockwait;
2991 
2992 	freewait =
2993 	    waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
2994 	freelockwait =
2995 	    waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
2996 	lowlockwait =
2997 	    waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
2998 
2999 	len += sprintf(buffer,
3000 		       "JFS TxAnchor\n"
3001 		       "============\n"
3002 		       "freetid = %d\n"
3003 		       "freewait = %s\n"
3004 		       "freelock = %d\n"
3005 		       "freelockwait = %s\n"
3006 		       "lowlockwait = %s\n"
3007 		       "tlocksInUse = %d\n"
3008 		       "TlocksLow = %d\n"
3009 		       "unlock_queue = 0x%p\n"
3010 		       "unlock_tail = 0x%p\n",
3011 		       TxAnchor.freetid,
3012 		       freewait,
3013 		       TxAnchor.freelock,
3014 		       freelockwait,
3015 		       lowlockwait,
3016 		       TxAnchor.tlocksInUse,
3017 		       TxAnchor.TlocksLow,
3018 		       TxAnchor.unlock_queue,
3019 		       TxAnchor.unlock_tail);
3020 
3021 	begin = offset;
3022 	*start = buffer + begin;
3023 	len -= begin;
3024 
3025 	if (len > length)
3026 		len = length;
3027 	else
3028 		*eof = 1;
3029 
3030 	if (len < 0)
3031 		len = 0;
3032 
3033 	return len;
3034 }
3035 #endif
3036 
3037 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
jfs_txstats_read(char * buffer,char ** start,off_t offset,int length,int * eof,void * data)3038 int jfs_txstats_read(char *buffer, char **start, off_t offset, int length,
3039 		     int *eof, void *data)
3040 {
3041 	int len = 0;
3042 	off_t begin;
3043 
3044 	len += sprintf(buffer,
3045 		       "JFS TxStats\n"
3046 		       "===========\n"
3047 		       "calls to txBegin = %d\n"
3048 		       "txBegin blocked by sync barrier = %d\n"
3049 		       "txBegin blocked by tlocks low = %d\n"
3050 		       "txBegin blocked by no free tid = %d\n"
3051 		       "calls to txBeginAnon = %d\n"
3052 		       "txBeginAnon blocked by sync barrier = %d\n"
3053 		       "txBeginAnon blocked by tlocks low = %d\n"
3054 		       "calls to txLockAlloc = %d\n"
3055 		       "tLockAlloc blocked by no free lock = %d\n",
3056 		       TxStat.txBegin,
3057 		       TxStat.txBegin_barrier,
3058 		       TxStat.txBegin_lockslow,
3059 		       TxStat.txBegin_freetid,
3060 		       TxStat.txBeginAnon,
3061 		       TxStat.txBeginAnon_barrier,
3062 		       TxStat.txBeginAnon_lockslow,
3063 		       TxStat.txLockAlloc,
3064 		       TxStat.txLockAlloc_freelock);
3065 
3066 	begin = offset;
3067 	*start = buffer + begin;
3068 	len -= begin;
3069 
3070 	if (len > length)
3071 		len = length;
3072 	else
3073 		*eof = 1;
3074 
3075 	if (len < 0)
3076 		len = 0;
3077 
3078 	return len;
3079 }
3080 #endif
3081