1 /*
2 * Copyright (c) International Business Machines Corp., 2000-2003
3 * Portions Copyright (c) Christoph Hellwig, 2001-2002
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20 /*
21 * jfs_logmgr.c: log manager
22 *
23 * for related information, see transaction manager (jfs_txnmgr.c), and
24 * recovery manager (jfs_logredo.c).
25 *
26 * note: for detail, RTFS.
27 *
28 * log buffer manager:
29 * special purpose buffer manager supporting log i/o requirements.
30 * per log serial pageout of logpage
31 * queuing i/o requests and redrive i/o at iodone
32 * maintain current logpage buffer
33 * no caching since append only
34 * appropriate jfs buffer cache buffers as needed
35 *
36 * group commit:
37 * transactions which wrote COMMIT records in the same in-memory
38 * log page during the pageout of previous/current log page(s) are
39 * committed together by the pageout of the page.
40 *
41 * TBD lazy commit:
42 * transactions are committed asynchronously when the log page
43 * containing it COMMIT is paged out when it becomes full;
44 *
45 * serialization:
46 * . a per log lock serialize log write.
47 * . a per log lock serialize group commit.
48 * . a per log lock serialize log open/close;
49 *
50 * TBD log integrity:
51 * careful-write (ping-pong) of last logpage to recover from crash
52 * in overwrite.
53 * detection of split (out-of-order) write of physical sectors
54 * of last logpage via timestamp at end of each sector
55 * with its mirror data array at trailer).
56 *
57 * alternatives:
58 * lsn - 64-bit monotonically increasing integer vs
59 * 32-bit lspn and page eor.
60 */
61
62 #include <linux/fs.h>
63 #include <linux/locks.h>
64 #include <linux/blkdev.h>
65 #include <linux/interrupt.h>
66 #include <linux/smp_lock.h>
67 #include <linux/completion.h>
68 #include "jfs_incore.h"
69 #include "jfs_filsys.h"
70 #include "jfs_metapage.h"
71 #include "jfs_txnmgr.h"
72 #include "jfs_debug.h"
73
74
75 /*
76 * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread)
77 */
78 static struct lbuf *log_redrive_list;
79 static spinlock_t log_redrive_lock = SPIN_LOCK_UNLOCKED;
80 DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);
81
82
83 /*
84 * log read/write serialization (per log)
85 */
86 #define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock)
87 #define LOG_LOCK(log) down(&((log)->loglock))
88 #define LOG_UNLOCK(log) up(&((log)->loglock))
89
90
91 /*
92 * log group commit serialization (per log)
93 */
94
95 #define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock)
96 #define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock)
97 #define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock)
98 #define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait)
99
100 /*
101 * log sync serialization (per log)
102 */
103 #define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE)
104 #define LOGSYNC_BARRIER(logsize) ((logsize)/4)
105 /*
106 #define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE)
107 #define LOGSYNC_BARRIER(logsize) ((logsize)/2)
108 */
109
110
111 /*
112 * log buffer cache synchronization
113 */
114 static spinlock_t jfsLCacheLock = SPIN_LOCK_UNLOCKED;
115
116 #define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags)
117 #define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags)
118
119 /*
120 * See __SLEEP_COND in jfs_locks.h
121 */
122 #define LCACHE_SLEEP_COND(wq, cond, flags) \
123 do { \
124 if (cond) \
125 break; \
126 __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
127 } while (0)
128
129 #define LCACHE_WAKEUP(event) wake_up(event)
130
131
132 /*
133 * lbuf buffer cache (lCache) control
134 */
135 /* log buffer manager pageout control (cumulative, inclusive) */
136 #define lbmREAD 0x0001
137 #define lbmWRITE 0x0002 /* enqueue at tail of write queue;
138 * init pageout if at head of queue;
139 */
140 #define lbmRELEASE 0x0004 /* remove from write queue
141 * at completion of pageout;
142 * do not free/recycle it yet:
143 * caller will free it;
144 */
145 #define lbmSYNC 0x0008 /* do not return to freelist
146 * when removed from write queue;
147 */
148 #define lbmFREE 0x0010 /* return to freelist
149 * at completion of pageout;
150 * the buffer may be recycled;
151 */
152 #define lbmDONE 0x0020
153 #define lbmERROR 0x0040
154 #define lbmGC 0x0080 /* lbmIODone to perform post-GC processing
155 * of log page
156 */
157 #define lbmDIRECT 0x0100
158
159 /*
160 * external references
161 */
162 extern void txLazyUnlock(struct tblock * tblk);
163 extern int jfs_stop_threads;
164 extern struct completion jfsIOwait;
165
166 /*
167 * forward references
168 */
169 static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
170 struct lrd * lrd, struct tlock * tlck);
171
172 static int lmNextPage(struct jfs_log * log);
173 static int lmLogFileSystem(struct jfs_log * log, char *uuid, int activate);
174
175 static int lbmLogInit(struct jfs_log * log);
176 static void lbmLogShutdown(struct jfs_log * log);
177 static struct lbuf *lbmAllocate(struct jfs_log * log, int);
178 static void lbmFree(struct lbuf * bp);
179 static void lbmfree(struct lbuf * bp);
180 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
181 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
182 int cant_block);
183 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
184 static int lbmIOWait(struct lbuf * bp, int flag);
185 static void lbmIODone(struct buffer_head *bh, int);
186 static void lbmStartIO(struct lbuf * bp);
187 static void lmGCwrite(struct jfs_log * log, int cant_block);
188 static int lmLogSync(struct jfs_log * log, int nosyncwait);
189
190
191 /*
192 * statistics
193 */
194 #ifdef CONFIG_JFS_STATISTICS
195 struct lmStat {
196 uint commit; /* # of commit */
197 uint pagedone; /* # of page written */
198 uint submitted; /* # of pages submitted */
199 uint full_page; /* # of full pages submitted */
200 uint partial_page; /* # of partial pages submitted */
201 } lmStat;
202 #endif
203
204
205 /*
206 * NAME: lmLog()
207 *
208 * FUNCTION: write a log record;
209 *
210 * PARAMETER:
211 *
212 * RETURN: lsn - offset to the next log record to write (end-of-log);
213 * -1 - error;
214 *
215 * note: todo: log error handler
216 */
lmLog(struct jfs_log * log,struct tblock * tblk,struct lrd * lrd,struct tlock * tlck)217 int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
218 struct tlock * tlck)
219 {
220 int lsn;
221 int diffp, difft;
222 struct metapage *mp = NULL;
223
224 jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
225 log, tblk, lrd, tlck);
226
227 LOG_LOCK(log);
228
229 /* log by (out-of-transaction) JFS ? */
230 if (tblk == NULL)
231 goto writeRecord;
232
233 /* log from page ? */
234 if (tlck == NULL ||
235 tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
236 goto writeRecord;
237
238 /*
239 * initialize/update page/transaction recovery lsn
240 */
241 lsn = log->lsn;
242
243 LOGSYNC_LOCK(log);
244
245 /*
246 * initialize page lsn if first log write of the page
247 */
248 if (mp->lsn == 0) {
249 mp->log = log;
250 mp->lsn = lsn;
251 log->count++;
252
253 /* insert page at tail of logsynclist */
254 list_add_tail(&mp->synclist, &log->synclist);
255 }
256
257 /*
258 * initialize/update lsn of tblock of the page
259 *
260 * transaction inherits oldest lsn of pages associated
261 * with allocation/deallocation of resources (their
262 * log records are used to reconstruct allocation map
263 * at recovery time: inode for inode allocation map,
264 * B+-tree index of extent descriptors for block
265 * allocation map);
266 * allocation map pages inherit transaction lsn at
267 * commit time to allow forwarding log syncpt past log
268 * records associated with allocation/deallocation of
269 * resources only after persistent map of these map pages
270 * have been updated and propagated to home.
271 */
272 /*
273 * initialize transaction lsn:
274 */
275 if (tblk->lsn == 0) {
276 /* inherit lsn of its first page logged */
277 tblk->lsn = mp->lsn;
278 log->count++;
279
280 /* insert tblock after the page on logsynclist */
281 list_add(&tblk->synclist, &mp->synclist);
282 }
283 /*
284 * update transaction lsn:
285 */
286 else {
287 /* inherit oldest/smallest lsn of page */
288 logdiff(diffp, mp->lsn, log);
289 logdiff(difft, tblk->lsn, log);
290 if (diffp < difft) {
291 /* update tblock lsn with page lsn */
292 tblk->lsn = mp->lsn;
293
294 /* move tblock after page on logsynclist */
295 list_del(&tblk->synclist);
296 list_add(&tblk->synclist, &mp->synclist);
297 }
298 }
299
300 LOGSYNC_UNLOCK(log);
301
302 /*
303 * write the log record
304 */
305 writeRecord:
306 lsn = lmWriteRecord(log, tblk, lrd, tlck);
307
308 /*
309 * forward log syncpt if log reached next syncpt trigger
310 */
311 logdiff(diffp, lsn, log);
312 if (diffp >= log->nextsync)
313 lsn = lmLogSync(log, 0);
314
315 /* update end-of-log lsn */
316 log->lsn = lsn;
317
318 LOG_UNLOCK(log);
319
320 /* return end-of-log address */
321 return lsn;
322 }
323
324
325 /*
326 * NAME: lmWriteRecord()
327 *
328 * FUNCTION: move the log record to current log page
329 *
330 * PARAMETER: cd - commit descriptor
331 *
332 * RETURN: end-of-log address
333 *
334 * serialization: LOG_LOCK() held on entry/exit
335 */
336 static int
lmWriteRecord(struct jfs_log * log,struct tblock * tblk,struct lrd * lrd,struct tlock * tlck)337 lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
338 struct tlock * tlck)
339 {
340 int lsn = 0; /* end-of-log address */
341 struct lbuf *bp; /* dst log page buffer */
342 struct logpage *lp; /* dst log page */
343 caddr_t dst; /* destination address in log page */
344 int dstoffset; /* end-of-log offset in log page */
345 int freespace; /* free space in log page */
346 caddr_t p; /* src meta-data page */
347 caddr_t src;
348 int srclen;
349 int nbytes; /* number of bytes to move */
350 int i;
351 int len;
352 struct linelock *linelock;
353 struct lv *lv;
354 struct lvd *lvd;
355 int l2linesize;
356
357 len = 0;
358
359 /* retrieve destination log page to write */
360 bp = (struct lbuf *) log->bp;
361 lp = (struct logpage *) bp->l_ldata;
362 dstoffset = log->eor;
363
364 /* any log data to write ? */
365 if (tlck == NULL)
366 goto moveLrd;
367
368 /*
369 * move log record data
370 */
371 /* retrieve source meta-data page to log */
372 if (tlck->flag & tlckPAGELOCK) {
373 p = (caddr_t) (tlck->mp->data);
374 linelock = (struct linelock *) & tlck->lock;
375 }
376 /* retrieve source in-memory inode to log */
377 else if (tlck->flag & tlckINODELOCK) {
378 if (tlck->type & tlckDTREE)
379 p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
380 else
381 p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
382 linelock = (struct linelock *) & tlck->lock;
383 }
384 #ifdef _JFS_WIP
385 else if (tlck->flag & tlckINLINELOCK) {
386
387 inlinelock = (struct inlinelock *) & tlck;
388 p = (caddr_t) & inlinelock->pxd;
389 linelock = (struct linelock *) & tlck;
390 }
391 #endif /* _JFS_WIP */
392 else {
393 jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
394 return 0; /* Probably should trap */
395 }
396 l2linesize = linelock->l2linesize;
397
398 moveData:
399 ASSERT(linelock->index <= linelock->maxcnt);
400
401 lv = linelock->lv;
402 for (i = 0; i < linelock->index; i++, lv++) {
403 if (lv->length == 0)
404 continue;
405
406 /* is page full ? */
407 if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
408 /* page become full: move on to next page */
409 lmNextPage(log);
410
411 bp = log->bp;
412 lp = (struct logpage *) bp->l_ldata;
413 dstoffset = LOGPHDRSIZE;
414 }
415
416 /*
417 * move log vector data
418 */
419 src = (u8 *) p + (lv->offset << l2linesize);
420 srclen = lv->length << l2linesize;
421 len += srclen;
422 while (srclen > 0) {
423 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
424 nbytes = min(freespace, srclen);
425 dst = (caddr_t) lp + dstoffset;
426 memcpy(dst, src, nbytes);
427 dstoffset += nbytes;
428
429 /* is page not full ? */
430 if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
431 break;
432
433 /* page become full: move on to next page */
434 lmNextPage(log);
435
436 bp = (struct lbuf *) log->bp;
437 lp = (struct logpage *) bp->l_ldata;
438 dstoffset = LOGPHDRSIZE;
439
440 srclen -= nbytes;
441 src += nbytes;
442 }
443
444 /*
445 * move log vector descriptor
446 */
447 len += 4;
448 lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
449 lvd->offset = cpu_to_le16(lv->offset);
450 lvd->length = cpu_to_le16(lv->length);
451 dstoffset += 4;
452 jfs_info("lmWriteRecord: lv offset:%d length:%d",
453 lv->offset, lv->length);
454 }
455
456 if ((i = linelock->next)) {
457 linelock = (struct linelock *) lid_to_tlock(i);
458 goto moveData;
459 }
460
461 /*
462 * move log record descriptor
463 */
464 moveLrd:
465 lrd->length = cpu_to_le16(len);
466
467 src = (caddr_t) lrd;
468 srclen = LOGRDSIZE;
469
470 while (srclen > 0) {
471 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
472 nbytes = min(freespace, srclen);
473 dst = (caddr_t) lp + dstoffset;
474 memcpy(dst, src, nbytes);
475
476 dstoffset += nbytes;
477 srclen -= nbytes;
478
479 /* are there more to move than freespace of page ? */
480 if (srclen)
481 goto pageFull;
482
483 /*
484 * end of log record descriptor
485 */
486
487 /* update last log record eor */
488 log->eor = dstoffset;
489 bp->l_eor = dstoffset;
490 lsn = (log->page << L2LOGPSIZE) + dstoffset;
491
492 if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
493 tblk->clsn = lsn;
494 jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
495 bp->l_eor);
496
497 INCREMENT(lmStat.commit); /* # of commit */
498
499 /*
500 * enqueue tblock for group commit:
501 *
502 * enqueue tblock of non-trivial/synchronous COMMIT
503 * at tail of group commit queue
504 * (trivial/asynchronous COMMITs are ignored by
505 * group commit.)
506 */
507 LOGGC_LOCK(log);
508
509 /* init tblock gc state */
510 tblk->flag = tblkGC_QUEUE;
511 tblk->bp = log->bp;
512 tblk->pn = log->page;
513 tblk->eor = log->eor;
514
515 /* enqueue transaction to commit queue */
516 tblk->cqnext = NULL;
517 if (log->cqueue.head) {
518 log->cqueue.tail->cqnext = tblk;
519 log->cqueue.tail = tblk;
520 } else
521 log->cqueue.head = log->cqueue.tail = tblk;
522
523 LOGGC_UNLOCK(log);
524 }
525
526 jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
527 le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
528
529 /* page not full ? */
530 if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
531 return lsn;
532
533 pageFull:
534 /* page become full: move on to next page */
535 lmNextPage(log);
536
537 bp = (struct lbuf *) log->bp;
538 lp = (struct logpage *) bp->l_ldata;
539 dstoffset = LOGPHDRSIZE;
540 src += nbytes;
541 }
542
543 return lsn;
544 }
545
546
547 /*
548 * NAME: lmNextPage()
549 *
550 * FUNCTION: write current page and allocate next page.
551 *
552 * PARAMETER: log
553 *
554 * RETURN: 0
555 *
556 * serialization: LOG_LOCK() held on entry/exit
557 */
lmNextPage(struct jfs_log * log)558 static int lmNextPage(struct jfs_log * log)
559 {
560 struct logpage *lp;
561 int lspn; /* log sequence page number */
562 int pn; /* current page number */
563 struct lbuf *bp;
564 struct lbuf *nextbp;
565 struct tblock *tblk;
566
567 /* get current log page number and log sequence page number */
568 pn = log->page;
569 bp = log->bp;
570 lp = (struct logpage *) bp->l_ldata;
571 lspn = le32_to_cpu(lp->h.page);
572
573 LOGGC_LOCK(log);
574
575 /*
576 * write or queue the full page at the tail of write queue
577 */
578 /* get the tail tblk on commit queue */
579 tblk = log->cqueue.tail;
580
581 /* every tblk who has COMMIT record on the current page,
582 * and has not been committed, must be on commit queue
583 * since tblk is queued at commit queueu at the time
584 * of writing its COMMIT record on the page before
585 * page becomes full (even though the tblk thread
586 * who wrote COMMIT record may have been suspended
587 * currently);
588 */
589
590 /* is page bound with outstanding tail tblk ? */
591 if (tblk && tblk->pn == pn) {
592 /* mark tblk for end-of-page */
593 tblk->flag |= tblkGC_EOP;
594
595 if (log->cflag & logGC_PAGEOUT) {
596 /* if page is not already on write queue,
597 * just enqueue (no lbmWRITE to prevent redrive)
598 * buffer to wqueue to ensure correct serial order
599 * of the pages since log pages will be added
600 * continuously
601 */
602 if (bp->l_wqnext == NULL)
603 lbmWrite(log, bp, 0, 0);
604 } else {
605 /*
606 * No current GC leader, initiate group commit
607 */
608 log->cflag |= logGC_PAGEOUT;
609 lmGCwrite(log, 0);
610 }
611 }
612 /* page is not bound with outstanding tblk:
613 * init write or mark it to be redriven (lbmWRITE)
614 */
615 else {
616 /* finalize the page */
617 bp->l_ceor = bp->l_eor;
618 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
619 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
620 }
621 LOGGC_UNLOCK(log);
622
623 /*
624 * allocate/initialize next page
625 */
626 /* if log wraps, the first data page of log is 2
627 * (0 never used, 1 is superblock).
628 */
629 log->page = (pn == log->size - 1) ? 2 : pn + 1;
630 log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
631
632 /* allocate/initialize next log page buffer */
633 nextbp = lbmAllocate(log, log->page);
634 nextbp->l_eor = log->eor;
635 log->bp = nextbp;
636
637 /* initialize next log page */
638 lp = (struct logpage *) nextbp->l_ldata;
639 lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
640 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
641
642 return 0;
643 }
644
645
646 /*
647 * NAME: lmGroupCommit()
648 *
649 * FUNCTION: group commit
650 * initiate pageout of the pages with COMMIT in the order of
651 * page number - redrive pageout of the page at the head of
652 * pageout queue until full page has been written.
653 *
654 * RETURN:
655 *
656 * NOTE:
657 * LOGGC_LOCK serializes log group commit queue, and
658 * transaction blocks on the commit queue.
659 * N.B. LOG_LOCK is NOT held during lmGroupCommit().
660 */
lmGroupCommit(struct jfs_log * log,struct tblock * tblk)661 int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
662 {
663 int rc = 0;
664
665 LOGGC_LOCK(log);
666
667 /* group committed already ? */
668 if (tblk->flag & tblkGC_COMMITTED) {
669 if (tblk->flag & tblkGC_ERROR)
670 rc = -EIO;
671
672 LOGGC_UNLOCK(log);
673 return rc;
674 }
675 jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
676
677 if (tblk->xflag & COMMIT_LAZY)
678 tblk->flag |= tblkGC_LAZY;
679
680 if ((!(log->cflag & logGC_PAGEOUT)) && log->cqueue.head &&
681 (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag))) {
682 /*
683 * No pageout in progress
684 *
685 * start group commit as its group leader.
686 */
687 log->cflag |= logGC_PAGEOUT;
688
689 lmGCwrite(log, 0);
690 }
691
692 if (tblk->xflag & COMMIT_LAZY) {
693 /*
694 * Lazy transactions can leave now
695 */
696 LOGGC_UNLOCK(log);
697 return 0;
698 }
699
700 /* lmGCwrite gives up LOGGC_LOCK, check again */
701
702 if (tblk->flag & tblkGC_COMMITTED) {
703 if (tblk->flag & tblkGC_ERROR)
704 rc = -EIO;
705
706 LOGGC_UNLOCK(log);
707 return rc;
708 }
709
710 /* upcount transaction waiting for completion
711 */
712 log->gcrtc++;
713 tblk->flag |= tblkGC_READY;
714
715 __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
716 LOGGC_LOCK(log), LOGGC_UNLOCK(log));
717
718 /* removed from commit queue */
719 if (tblk->flag & tblkGC_ERROR)
720 rc = -EIO;
721
722 LOGGC_UNLOCK(log);
723 return rc;
724 }
725
726 /*
727 * NAME: lmGCwrite()
728 *
729 * FUNCTION: group commit write
730 * initiate write of log page, building a group of all transactions
731 * with commit records on that page.
732 *
733 * RETURN: None
734 *
735 * NOTE:
736 * LOGGC_LOCK must be held by caller.
737 * N.B. LOG_LOCK is NOT held during lmGroupCommit().
738 */
lmGCwrite(struct jfs_log * log,int cant_write)739 static void lmGCwrite(struct jfs_log * log, int cant_write)
740 {
741 struct lbuf *bp;
742 struct logpage *lp;
743 int gcpn; /* group commit page number */
744 struct tblock *tblk;
745 struct tblock *xtblk;
746
747 /*
748 * build the commit group of a log page
749 *
750 * scan commit queue and make a commit group of all
751 * transactions with COMMIT records on the same log page.
752 */
753 /* get the head tblk on the commit queue */
754 tblk = xtblk = log->cqueue.head;
755 gcpn = tblk->pn;
756
757 while (tblk && tblk->pn == gcpn) {
758 xtblk = tblk;
759
760 /* state transition: (QUEUE, READY) -> COMMIT */
761 tblk->flag |= tblkGC_COMMIT;
762 tblk = tblk->cqnext;
763 }
764 tblk = xtblk; /* last tblk of the page */
765
766 /*
767 * pageout to commit transactions on the log page.
768 */
769 bp = (struct lbuf *) tblk->bp;
770 lp = (struct logpage *) bp->l_ldata;
771 /* is page already full ? */
772 if (tblk->flag & tblkGC_EOP) {
773 /* mark page to free at end of group commit of the page */
774 tblk->flag &= ~tblkGC_EOP;
775 tblk->flag |= tblkGC_FREE;
776 bp->l_ceor = bp->l_eor;
777 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
778 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
779 cant_write);
780 INCREMENT(lmStat.full_page);
781 }
782 /* page is not yet full */
783 else {
784 bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
785 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
786 lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
787 INCREMENT(lmStat.partial_page);
788 }
789 }
790
791 /*
792 * NAME: lmPostGC()
793 *
794 * FUNCTION: group commit post-processing
795 * Processes transactions after their commit records have been written
796 * to disk, redriving log I/O if necessary.
797 *
798 * RETURN: None
799 *
800 * NOTE:
801 * This routine is called a interrupt time by lbmIODone
802 */
lmPostGC(struct lbuf * bp)803 static void lmPostGC(struct lbuf * bp)
804 {
805 unsigned long flags;
806 struct jfs_log *log = bp->l_log;
807 struct logpage *lp;
808 struct tblock *tblk;
809
810 //LOGGC_LOCK(log);
811 spin_lock_irqsave(&log->gclock, flags);
812 /*
813 * current pageout of group commit completed.
814 *
815 * remove/wakeup transactions from commit queue who were
816 * group committed with the current log page
817 */
818 while ((tblk = log->cqueue.head) && (tblk->flag & tblkGC_COMMIT)) {
819 /* if transaction was marked GC_COMMIT then
820 * it has been shipped in the current pageout
821 * and made it to disk - it is committed.
822 */
823
824 if (bp->l_flag & lbmERROR)
825 tblk->flag |= tblkGC_ERROR;
826
827 /* remove it from the commit queue */
828 log->cqueue.head = tblk->cqnext;
829 if (log->cqueue.head == NULL)
830 log->cqueue.tail = NULL;
831 tblk->flag &= ~tblkGC_QUEUE;
832 tblk->cqnext = 0;
833
834 if (tblk == log->flush_tblk) {
835 /* we can stop flushing the log now */
836 clear_bit(log_FLUSH, &log->flag);
837 log->flush_tblk = NULL;
838 }
839
840 jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
841 tblk->flag);
842
843 if (!(tblk->xflag & COMMIT_FORCE))
844 /*
845 * Hand tblk over to lazy commit thread
846 */
847 txLazyUnlock(tblk);
848 else {
849 /* state transition: COMMIT -> COMMITTED */
850 tblk->flag |= tblkGC_COMMITTED;
851
852 if (tblk->flag & tblkGC_READY)
853 log->gcrtc--;
854
855 LOGGC_WAKEUP(tblk);
856 }
857
858 /* was page full before pageout ?
859 * (and this is the last tblk bound with the page)
860 */
861 if (tblk->flag & tblkGC_FREE)
862 lbmFree(bp);
863 /* did page become full after pageout ?
864 * (and this is the last tblk bound with the page)
865 */
866 else if (tblk->flag & tblkGC_EOP) {
867 /* finalize the page */
868 lp = (struct logpage *) bp->l_ldata;
869 bp->l_ceor = bp->l_eor;
870 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
871 jfs_info("lmPostGC: calling lbmWrite");
872 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
873 1);
874 }
875
876 }
877
878 /* are there any transactions who have entered lnGroupCommit()
879 * (whose COMMITs are after that of the last log page written.
880 * They are waiting for new group commit (above at (SLEEP 1))
881 * or lazy transactions are on a full (queued) log page,
882 * select the latest ready transaction as new group leader and
883 * wake her up to lead her group.
884 */
885 if ((tblk = log->cqueue.head) &&
886 ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
887 test_bit(log_FLUSH, &log->flag)))
888 /*
889 * Call lmGCwrite with new group leader
890 */
891 lmGCwrite(log, 1);
892
893 /* no transaction are ready yet (transactions are only just
894 * queued (GC_QUEUE) and not entered for group commit yet).
895 * the first transaction entering group commit
896 * will elect herself as new group leader.
897 */
898 else
899 log->cflag &= ~logGC_PAGEOUT;
900
901 //LOGGC_UNLOCK(log);
902 spin_unlock_irqrestore(&log->gclock, flags);
903 return;
904 }
905
906 /*
907 * NAME: lmLogSync()
908 *
909 * FUNCTION: write log SYNCPT record for specified log
910 * if new sync address is available
911 * (normally the case if sync() is executed by back-ground
912 * process).
913 * if not, explicitly run jfs_blogsync() to initiate
914 * getting of new sync address.
915 * calculate new value of i_nextsync which determines when
916 * this code is called again.
917 *
918 * this is called only from lmLog().
919 *
920 * PARAMETER: ip - pointer to logs inode.
921 *
922 * RETURN: 0
923 *
924 * serialization: LOG_LOCK() held on entry/exit
925 */
lmLogSync(struct jfs_log * log,int nosyncwait)926 static int lmLogSync(struct jfs_log * log, int nosyncwait)
927 {
928 int logsize;
929 int written; /* written since last syncpt */
930 int free; /* free space left available */
931 int delta; /* additional delta to write normally */
932 int more; /* additional write granted */
933 struct lrd lrd;
934 int lsn;
935 struct logsyncblk *lp;
936
937 /*
938 * forward syncpt
939 */
940 /* if last sync is same as last syncpt,
941 * invoke sync point forward processing to update sync.
942 */
943
944 if (log->sync == log->syncpt) {
945 LOGSYNC_LOCK(log);
946 /* ToDo: push dirty metapages out to disk */
947 // bmLogSync(log);
948
949 if (list_empty(&log->synclist))
950 log->sync = log->lsn;
951 else {
952 lp = list_entry(log->synclist.next,
953 struct logsyncblk, synclist);
954 log->sync = lp->lsn;
955 }
956 LOGSYNC_UNLOCK(log);
957
958 }
959
960 /* if sync is different from last syncpt,
961 * write a SYNCPT record with syncpt = sync.
962 * reset syncpt = sync
963 */
964 if (log->sync != log->syncpt) {
965 struct super_block *sb = log->sb;
966 struct jfs_sb_info *sbi = JFS_SBI(sb);
967
968 /*
969 * We need to make sure all of the "written" metapages
970 * actually make it to disk
971 */
972 fsync_inode_data_buffers(sbi->ipbmap);
973 fsync_inode_data_buffers(sbi->ipimap);
974 fsync_inode_data_buffers(sb->s_bdev->bd_inode);
975
976 lrd.logtid = 0;
977 lrd.backchain = 0;
978 lrd.type = cpu_to_le16(LOG_SYNCPT);
979 lrd.length = 0;
980 lrd.log.syncpt.sync = cpu_to_le32(log->sync);
981 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
982
983 log->syncpt = log->sync;
984 } else
985 lsn = log->lsn;
986
987 /*
988 * setup next syncpt trigger (SWAG)
989 */
990 logsize = log->logsize;
991
992 logdiff(written, lsn, log);
993 free = logsize - written;
994 delta = LOGSYNC_DELTA(logsize);
995 more = min(free / 2, delta);
996 if (more < 2 * LOGPSIZE) {
997 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
998 /*
999 * log wrapping
1000 *
1001 * option 1 - panic ? No.!
1002 * option 2 - shutdown file systems
1003 * associated with log ?
1004 * option 3 - extend log ?
1005 */
1006 /*
1007 * option 4 - second chance
1008 *
1009 * mark log wrapped, and continue.
1010 * when all active transactions are completed,
1011 * mark log vaild for recovery.
1012 * if crashed during invalid state, log state
1013 * implies invald log, forcing fsck().
1014 */
1015 /* mark log state log wrap in log superblock */
1016 /* log->state = LOGWRAP; */
1017
1018 /* reset sync point computation */
1019 log->syncpt = log->sync = lsn;
1020 log->nextsync = delta;
1021 } else
1022 /* next syncpt trigger = written + more */
1023 log->nextsync = written + more;
1024
1025 /* return if lmLogSync() from outside of transaction, e.g., sync() */
1026 if (nosyncwait)
1027 return lsn;
1028
1029 /* if number of bytes written from last sync point is more
1030 * than 1/4 of the log size, stop new transactions from
1031 * starting until all current transactions are completed
1032 * by setting syncbarrier flag.
1033 */
1034 if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) {
1035 set_bit(log_SYNCBARRIER, &log->flag);
1036 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1037 log->syncpt);
1038 /*
1039 * We may have to initiate group commit
1040 */
1041 jfs_flush_journal(log, 0);
1042 }
1043
1044 return lsn;
1045 }
1046
1047
1048 /*
1049 * NAME: lmLogOpen()
1050 *
1051 * FUNCTION: open the log on first open;
1052 * insert filesystem in the active list of the log.
1053 *
1054 * PARAMETER: ipmnt - file system mount inode
1055 * iplog - log inode (out)
1056 *
1057 * RETURN:
1058 *
1059 * serialization:
1060 */
lmLogOpen(struct super_block * sb,struct jfs_log ** logptr)1061 int lmLogOpen(struct super_block *sb, struct jfs_log ** logptr)
1062 {
1063 int rc;
1064 struct block_device *bdev;
1065 struct jfs_log *log;
1066
1067 if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1068 return -ENOMEM;
1069 memset(log, 0, sizeof(struct jfs_log));
1070 init_waitqueue_head(&log->syncwait);
1071
1072 log->sb = sb; /* This should be a list */
1073
1074 if (!(JFS_SBI(sb)->mntflag & JFS_INLINELOG))
1075 goto externalLog;
1076
1077 /*
1078 * in-line log in host file system
1079 *
1080 * file system to log have 1-to-1 relationship;
1081 */
1082
1083 set_bit(log_INLINELOG, &log->flag);
1084 log->bdev = sb->s_bdev;
1085 log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1086 log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1087 (L2LOGPSIZE - sb->s_blocksize_bits);
1088 log->l2bsize = sb->s_blocksize_bits;
1089 ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1090
1091 /*
1092 * initialize log.
1093 */
1094 if ((rc = lmLogInit(log)))
1095 goto free;
1096 goto out;
1097
1098 /*
1099 * external log as separate logical volume
1100 *
1101 * file systems to log may have n-to-1 relationship;
1102 */
1103 externalLog:
1104
1105 /*
1106 * TODO: Check for already opened log devices
1107 */
1108
1109 if (!(bdev = bdget(kdev_t_to_nr(JFS_SBI(sb)->logdev)))) {
1110 rc = -ENODEV;
1111 goto free;
1112 }
1113
1114 if ((rc = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_FS))) {
1115 goto free;
1116 }
1117
1118 log->bdev = bdev;
1119 memcpy(log->uuid, JFS_SBI(sb)->loguuid, sizeof(log->uuid));
1120
1121 /*
1122 * initialize log:
1123 */
1124 if ((rc = lmLogInit(log)))
1125 goto close;
1126
1127 /*
1128 * add file system to log active file system list
1129 */
1130 if ((rc = lmLogFileSystem(log, JFS_SBI(sb)->uuid, 1)))
1131 goto shutdown;
1132
1133 out:
1134 *logptr = log;
1135 return 0;
1136
1137 /*
1138 * unwind on error
1139 */
1140 shutdown: /* unwind lbmLogInit() */
1141 lbmLogShutdown(log);
1142
1143 close: /* close external log device */
1144 blkdev_put(bdev, BDEV_FS);
1145
1146 free: /* free log descriptor */
1147 kfree(log);
1148
1149 jfs_warn("lmLogOpen: exit(%d)", rc);
1150 return rc;
1151 }
1152
1153
1154 /*
1155 * NAME: lmLogInit()
1156 *
1157 * FUNCTION: log initialization at first log open.
1158 *
1159 * logredo() (or logformat()) should have been run previously.
1160 * initialize the log inode from log superblock.
1161 * set the log state in the superblock to LOGMOUNT and
1162 * write SYNCPT log record.
1163 *
1164 * PARAMETER: log - log structure
1165 *
1166 * RETURN: 0 - if ok
1167 * -EINVAL - bad log magic number or superblock dirty
1168 * error returned from logwait()
1169 *
1170 * serialization: single first open thread
1171 */
lmLogInit(struct jfs_log * log)1172 int lmLogInit(struct jfs_log * log)
1173 {
1174 int rc = 0;
1175 struct lrd lrd;
1176 struct logsuper *logsuper;
1177 struct lbuf *bpsuper;
1178 struct lbuf *bp;
1179 struct logpage *lp;
1180 int lsn;
1181
1182 jfs_info("lmLogInit: log:0x%p", log);
1183
1184 /*
1185 * log inode is overlaid on generic inode where
1186 * dinode have been zeroed out by iRead();
1187 */
1188
1189 /*
1190 * initialize log i/o
1191 */
1192 if ((rc = lbmLogInit(log)))
1193 return rc;
1194
1195 /*
1196 * validate log superblock
1197 */
1198
1199
1200 if (!test_bit(log_INLINELOG, &log->flag))
1201 log->l2bsize = 12; /* XXX kludge alert XXX */
1202 if ((rc = lbmRead(log, 1, &bpsuper)))
1203 goto errout10;
1204
1205 logsuper = (struct logsuper *) bpsuper->l_ldata;
1206
1207 if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1208 jfs_warn("*** Log Format Error ! ***");
1209 rc = -EINVAL;
1210 goto errout20;
1211 }
1212
1213 /* logredo() should have been run successfully. */
1214 if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1215 jfs_warn("*** Log Is Dirty ! ***");
1216 rc = -EINVAL;
1217 goto errout20;
1218 }
1219
1220 /* initialize log inode from log superblock */
1221 if (test_bit(log_INLINELOG,&log->flag)) {
1222 if (log->size != le32_to_cpu(logsuper->size)) {
1223 rc = -EINVAL;
1224 goto errout20;
1225 }
1226 jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1227 log, (unsigned long long) log->base, log->size);
1228 } else {
1229 if (memcmp(logsuper->uuid, log->uuid, 16)) {
1230 jfs_warn("wrong uuid on JFS log device");
1231 goto errout20;
1232 }
1233 log->size = le32_to_cpu(logsuper->size);
1234 log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1235 jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1236 log, (unsigned long long) log->base, log->size);
1237 }
1238
1239 log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1240 log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1241
1242 /* check for disabled journaling to disk */
1243 if (JFS_SBI(log->sb)->flag & JFS_NOINTEGRITY) {
1244 log->no_integrity = 1;
1245 log->ni_page = log->page;
1246 log->ni_eor = log->eor;
1247 }
1248 else
1249 log->no_integrity = 0;
1250
1251 /*
1252 * initialize for log append write mode
1253 */
1254 /* establish current/end-of-log page/buffer */
1255 if ((rc = lbmRead(log, log->page, &bp)))
1256 goto errout20;
1257
1258 lp = (struct logpage *) bp->l_ldata;
1259
1260 jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1261 le32_to_cpu(logsuper->end), log->page, log->eor,
1262 le16_to_cpu(lp->h.eor));
1263
1264 // ASSERT(log->eor == lp->h.eor);
1265
1266 log->bp = bp;
1267 bp->l_pn = log->page;
1268 bp->l_eor = log->eor;
1269
1270 /* initialize the group commit serialization lock */
1271 LOGGC_LOCK_INIT(log);
1272
1273 /* if current page is full, move on to next page */
1274 if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1275 lmNextPage(log);
1276
1277 /* allocate/initialize the log write serialization lock */
1278 LOG_LOCK_INIT(log);
1279
1280 /*
1281 * initialize log syncpoint
1282 */
1283 /*
1284 * write the first SYNCPT record with syncpoint = 0
1285 * (i.e., log redo up to HERE !);
1286 * remove current page from lbm write queue at end of pageout
1287 * (to write log superblock update), but do not release to freelist;
1288 */
1289 lrd.logtid = 0;
1290 lrd.backchain = 0;
1291 lrd.type = cpu_to_le16(LOG_SYNCPT);
1292 lrd.length = 0;
1293 lrd.log.syncpt.sync = 0;
1294 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1295 bp = log->bp;
1296 bp->l_ceor = bp->l_eor;
1297 lp = (struct logpage *) bp->l_ldata;
1298 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1299 lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1300 if ((rc = lbmIOWait(bp, 0)))
1301 goto errout30;
1302
1303 /* initialize logsync parameters */
1304 log->logsize = (log->size - 2) << L2LOGPSIZE;
1305 log->lsn = lsn;
1306 log->syncpt = lsn;
1307 log->sync = log->syncpt;
1308 log->nextsync = LOGSYNC_DELTA(log->logsize);
1309
1310 jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1311 log->lsn, log->syncpt, log->sync);
1312
1313 LOGSYNC_LOCK_INIT(log);
1314
1315 INIT_LIST_HEAD(&log->synclist);
1316
1317 log->cqueue.head = log->cqueue.tail = NULL;
1318 log->flush_tblk = NULL;
1319
1320 log->count = 0;
1321
1322 /*
1323 * initialize for lazy/group commit
1324 */
1325 log->clsn = lsn;
1326
1327 /*
1328 * update/write superblock
1329 */
1330 logsuper->state = cpu_to_le32(LOGMOUNT);
1331 log->serial = le32_to_cpu(logsuper->serial) + 1;
1332 logsuper->serial = cpu_to_le32(log->serial);
1333 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1334 if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1335 goto errout30;
1336
1337 return 0;
1338
1339 /*
1340 * unwind on error
1341 */
1342 errout30: /* release log page */
1343 log->wqueue = NULL;
1344 bp->l_wqnext = NULL;
1345 lbmFree(bp);
1346
1347 errout20: /* release log superblock */
1348 lbmFree(bpsuper);
1349
1350 errout10: /* unwind lbmLogInit() */
1351 lbmLogShutdown(log);
1352
1353 jfs_warn("lmLogInit: exit(%d)", rc);
1354 return rc;
1355 }
1356
1357
1358 /*
1359 * NAME: lmLogClose()
1360 *
1361 * FUNCTION: remove file system <ipmnt> from active list of log <iplog>
1362 * and close it on last close.
1363 *
1364 * PARAMETER: sb - superblock
1365 * log - log inode
1366 *
1367 * RETURN: errors from subroutines
1368 *
1369 * serialization:
1370 */
lmLogClose(struct super_block * sb,struct jfs_log * log)1371 int lmLogClose(struct super_block *sb, struct jfs_log * log)
1372 {
1373 int rc;
1374
1375 jfs_info("lmLogClose: log:0x%p", log);
1376
1377 if (!test_bit(log_INLINELOG, &log->flag))
1378 goto externalLog;
1379
1380 /*
1381 * in-line log in host file system
1382 */
1383 rc = lmLogShutdown(log);
1384 goto out;
1385
1386 /*
1387 * external log as separate logical volume
1388 */
1389 externalLog:
1390 lmLogFileSystem(log, JFS_SBI(sb)->uuid, 0);
1391 rc = lmLogShutdown(log);
1392 blkdev_put(log->bdev, BDEV_FS);
1393
1394 out:
1395 kfree(log);
1396 jfs_info("lmLogClose: exit(%d)", rc);
1397 return rc;
1398 }
1399
1400
1401 /*
1402 * NAME: jfs_flush_journal()
1403 *
1404 * FUNCTION: initiate write of any outstanding transactions to the journal
1405 * and optionally wait until they are all written to disk
1406 *
1407 * wait == 0 flush until latest txn is committed, don't wait
1408 * wait == 1 flush until latest txn is committed, wait
1409 * wait > 1 flush until all txn's are complete, wait
1410 */
jfs_flush_journal(struct jfs_log * log,int wait)1411 void jfs_flush_journal(struct jfs_log *log, int wait)
1412 {
1413 int i;
1414 struct tblock *target;
1415
1416 if (!log)
1417 /* jfs_write_inode may call us during read-only mount */
1418 return;
1419
1420 jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1421
1422 LOGGC_LOCK(log);
1423
1424 target = log->cqueue.head;
1425
1426 if (target) {
1427 /*
1428 * This ensures that we will keep writing to the journal as long
1429 * as there are unwritten commit records
1430 */
1431
1432 if (test_bit(log_FLUSH, &log->flag)) {
1433 /*
1434 * We're already flushing.
1435 * if flush_tblk is NULL, we are flushing everything,
1436 * so leave it that way. Otherwise, update it to the
1437 * latest transaction
1438 */
1439 if (log->flush_tblk)
1440 log->flush_tblk = target;
1441 } else {
1442 /* Only flush until latest transaction is committed */
1443 log->flush_tblk = target;
1444 set_bit(log_FLUSH, &log->flag);
1445
1446 /*
1447 * Initiate I/O on outstanding transactions
1448 */
1449 if (!(log->cflag & logGC_PAGEOUT)) {
1450 log->cflag |= logGC_PAGEOUT;
1451 lmGCwrite(log, 0);
1452 }
1453 }
1454 }
1455 if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1456 /* Flush until all activity complete */
1457 set_bit(log_FLUSH, &log->flag);
1458 log->flush_tblk = NULL;
1459 }
1460
1461 if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1462 DECLARE_WAITQUEUE(__wait, current);
1463
1464 add_wait_queue(&target->gcwait, &__wait);
1465 set_current_state(TASK_UNINTERRUPTIBLE);
1466 LOGGC_UNLOCK(log);
1467 schedule();
1468 current->state = TASK_RUNNING;
1469 LOGGC_LOCK(log);
1470 remove_wait_queue(&target->gcwait, &__wait);
1471 }
1472 LOGGC_UNLOCK(log);
1473
1474 if (wait < 2)
1475 return;
1476
1477 /*
1478 * If there was recent activity, we may need to wait
1479 * for the lazycommit thread to catch up
1480 */
1481 if (log->cqueue.head || !list_empty(&log->synclist)) {
1482 for (i = 0; i < 800; i++) { /* Too much? */
1483 current->state = TASK_INTERRUPTIBLE;
1484 schedule_timeout(HZ / 4);
1485 if ((log->cqueue.head == NULL) &&
1486 list_empty(&log->synclist))
1487 break;
1488 }
1489 }
1490 assert(log->cqueue.head == NULL);
1491 assert(list_empty(&log->synclist));
1492 clear_bit(log_FLUSH, &log->flag);
1493 }
1494
1495 /*
1496 * NAME: lmLogShutdown()
1497 *
1498 * FUNCTION: log shutdown at last LogClose().
1499 *
1500 * write log syncpt record.
1501 * update super block to set redone flag to 0.
1502 *
1503 * PARAMETER: log - log inode
1504 *
1505 * RETURN: 0 - success
1506 *
1507 * serialization: single last close thread
1508 */
lmLogShutdown(struct jfs_log * log)1509 int lmLogShutdown(struct jfs_log * log)
1510 {
1511 int rc;
1512 struct lrd lrd;
1513 int lsn;
1514 struct logsuper *logsuper;
1515 struct lbuf *bpsuper;
1516 struct lbuf *bp;
1517 struct logpage *lp;
1518
1519 jfs_info("lmLogShutdown: log:0x%p", log);
1520
1521 jfs_flush_journal(log, 2);
1522
1523 /*
1524 * We need to make sure all of the "written" metapages
1525 * actually make it to disk
1526 */
1527 fsync_no_super(log->sb->s_dev);
1528
1529 /*
1530 * write the last SYNCPT record with syncpoint = 0
1531 * (i.e., log redo up to HERE !)
1532 */
1533 lrd.logtid = 0;
1534 lrd.backchain = 0;
1535 lrd.type = cpu_to_le16(LOG_SYNCPT);
1536 lrd.length = 0;
1537 lrd.log.syncpt.sync = 0;
1538
1539 /* check for disabled journaling to disk */
1540 if (JFS_SBI(log->sb)->flag & JFS_NOINTEGRITY) {
1541 log->no_integrity = 0;
1542 log->page = log->ni_page;
1543 log->eor = log->ni_eor;
1544 }
1545
1546 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1547 bp = log->bp;
1548 lp = (struct logpage *) bp->l_ldata;
1549 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1550 lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1551 lbmIOWait(log->bp, lbmFREE);
1552
1553 /*
1554 * synchronous update log superblock
1555 * mark log state as shutdown cleanly
1556 * (i.e., Log does not need to be replayed).
1557 */
1558 if ((rc = lbmRead(log, 1, &bpsuper)))
1559 goto out;
1560
1561 logsuper = (struct logsuper *) bpsuper->l_ldata;
1562 logsuper->state = cpu_to_le32(LOGREDONE);
1563 logsuper->end = cpu_to_le32(lsn);
1564 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1565 rc = lbmIOWait(bpsuper, lbmFREE);
1566
1567 jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1568 lsn, log->page, log->eor);
1569
1570 out:
1571 /*
1572 * shutdown per log i/o
1573 */
1574 lbmLogShutdown(log);
1575
1576 if (rc) {
1577 jfs_warn("lmLogShutdown: exit(%d)", rc);
1578 }
1579 return rc;
1580 }
1581
1582
1583 /*
1584 * NAME: lmLogFileSystem()
1585 *
1586 * FUNCTION: insert (<activate> = true)/remove (<activate> = false)
1587 * file system into/from log active file system list.
1588 *
1589 * PARAMETE: log - pointer to logs inode.
1590 * fsdev - kdev_t of filesystem.
1591 * serial - pointer to returned log serial number
1592 * activate - insert/remove device from active list.
1593 *
1594 * RETURN: 0 - success
1595 * errors returned by vms_iowait().
1596 */
lmLogFileSystem(struct jfs_log * log,char * uuid,int activate)1597 static int lmLogFileSystem(struct jfs_log * log, char *uuid, int activate)
1598 {
1599 int rc = 0;
1600 int i;
1601 struct logsuper *logsuper;
1602 struct lbuf *bpsuper;
1603
1604 /*
1605 * insert/remove file system device to log active file system list.
1606 */
1607 if ((rc = lbmRead(log, 1, &bpsuper)))
1608 return rc;
1609
1610 logsuper = (struct logsuper *) bpsuper->l_ldata;
1611 if (activate) {
1612 for (i = 0; i < MAX_ACTIVE; i++)
1613 if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1614 memcpy(logsuper->active[i].uuid, uuid, 16);
1615 break;
1616 }
1617 if (i == MAX_ACTIVE) {
1618 jfs_warn("Too many file systems sharing journal!");
1619 lbmFree(bpsuper);
1620 return -EMFILE; /* Is there a better rc? */
1621 }
1622 } else {
1623 for (i = 0; i < MAX_ACTIVE; i++)
1624 if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1625 memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
1626 break;
1627 }
1628 if (i == MAX_ACTIVE) {
1629 jfs_warn("Somebody stomped on the journal!");
1630 lbmFree(bpsuper);
1631 return -EIO;
1632 }
1633
1634 }
1635
1636 /*
1637 * synchronous write log superblock:
1638 *
1639 * write sidestream bypassing write queue:
1640 * at file system mount, log super block is updated for
1641 * activation of the file system before any log record
1642 * (MOUNT record) of the file system, and at file system
1643 * unmount, all meta data for the file system has been
1644 * flushed before log super block is updated for deactivation
1645 * of the file system.
1646 */
1647 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1648 rc = lbmIOWait(bpsuper, lbmFREE);
1649
1650 return rc;
1651 }
1652
1653 /*
1654 * log buffer manager (lbm)
1655 * ------------------------
1656 *
1657 * special purpose buffer manager supporting log i/o requirements.
1658 *
1659 * per log write queue:
1660 * log pageout occurs in serial order by fifo write queue and
1661 * restricting to a single i/o in pregress at any one time.
1662 * a circular singly-linked list
1663 * (log->wrqueue points to the tail, and buffers are linked via
1664 * bp->wrqueue field), and
1665 * maintains log page in pageout ot waiting for pageout in serial pageout.
1666 */
1667
1668 /*
1669 * lbmLogInit()
1670 *
1671 * initialize per log I/O setup at lmLogInit()
1672 */
lbmLogInit(struct jfs_log * log)1673 static int lbmLogInit(struct jfs_log * log)
1674 { /* log inode */
1675 int i;
1676 struct lbuf *lbuf;
1677
1678 jfs_info("lbmLogInit: log:0x%p", log);
1679
1680 /* initialize current buffer cursor */
1681 log->bp = NULL;
1682
1683 /* initialize log device write queue */
1684 log->wqueue = NULL;
1685
1686 /*
1687 * Each log has its own buffer pages allocated to it. These are
1688 * not managed by the page cache. This ensures that a transaction
1689 * writing to the log does not block trying to allocate a page from
1690 * the page cache (for the log). This would be bad, since page
1691 * allocation waits on the kswapd thread that may be committing inodes
1692 * which would cause log activity. Was that clear? I'm trying to
1693 * avoid deadlock here.
1694 */
1695 init_waitqueue_head(&log->free_wait);
1696
1697 log->lbuf_free = NULL;
1698
1699 for (i = 0; i < LOGPAGES; i++) {
1700 lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1701 if (lbuf == 0)
1702 goto error;
1703 lbuf->l_bh.b_data = lbuf->l_ldata =
1704 (char *) get_zeroed_page(GFP_KERNEL);
1705 if (lbuf->l_ldata == 0) {
1706 kfree(lbuf);
1707 goto error;
1708 }
1709 lbuf->l_log = log;
1710 init_waitqueue_head(&lbuf->l_ioevent);
1711
1712 lbuf->l_bh.b_size = LOGPSIZE;
1713 lbuf->l_bh.b_dev = to_kdev_t(log->bdev->bd_dev);
1714 lbuf->l_bh.b_end_io = lbmIODone;
1715 lbuf->l_bh.b_private = lbuf;
1716 lbuf->l_bh.b_page = virt_to_page(lbuf->l_ldata);
1717 lbuf->l_bh.b_state = 0;
1718 init_waitqueue_head(&lbuf->l_bh.b_wait);
1719
1720 lbuf->l_freelist = log->lbuf_free;
1721 log->lbuf_free = lbuf;
1722 }
1723
1724 return (0);
1725
1726 error:
1727 lbmLogShutdown(log);
1728 return -ENOMEM;
1729 }
1730
1731
1732 /*
1733 * lbmLogShutdown()
1734 *
1735 * finalize per log I/O setup at lmLogShutdown()
1736 */
lbmLogShutdown(struct jfs_log * log)1737 static void lbmLogShutdown(struct jfs_log * log)
1738 {
1739 struct lbuf *lbuf;
1740
1741 jfs_info("lbmLogShutdown: log:0x%p", log);
1742
1743 lbuf = log->lbuf_free;
1744 while (lbuf) {
1745 struct lbuf *next = lbuf->l_freelist;
1746 free_page((unsigned long) lbuf->l_ldata);
1747 kfree(lbuf);
1748 lbuf = next;
1749 }
1750
1751 log->bp = NULL;
1752 }
1753
1754
1755 /*
1756 * lbmAllocate()
1757 *
1758 * allocate an empty log buffer
1759 */
lbmAllocate(struct jfs_log * log,int pn)1760 static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1761 {
1762 struct lbuf *bp;
1763 unsigned long flags;
1764
1765 /*
1766 * recycle from log buffer freelist if any
1767 */
1768 LCACHE_LOCK(flags);
1769 LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1770 log->lbuf_free = bp->l_freelist;
1771 LCACHE_UNLOCK(flags);
1772
1773 bp->l_flag = 0;
1774
1775 bp->l_wqnext = NULL;
1776 bp->l_freelist = NULL;
1777
1778 bp->l_pn = pn;
1779 bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1780 bp->l_bh.b_blocknr = bp->l_blkno;
1781 bp->l_ceor = 0;
1782
1783 return bp;
1784 }
1785
1786
1787 /*
1788 * lbmFree()
1789 *
1790 * release a log buffer to freelist
1791 */
lbmFree(struct lbuf * bp)1792 static void lbmFree(struct lbuf * bp)
1793 {
1794 unsigned long flags;
1795
1796 LCACHE_LOCK(flags);
1797
1798 lbmfree(bp);
1799
1800 LCACHE_UNLOCK(flags);
1801 }
1802
lbmfree(struct lbuf * bp)1803 static void lbmfree(struct lbuf * bp)
1804 {
1805 struct jfs_log *log = bp->l_log;
1806
1807 assert(bp->l_wqnext == NULL);
1808
1809 /*
1810 * return the buffer to head of freelist
1811 */
1812 bp->l_freelist = log->lbuf_free;
1813 log->lbuf_free = bp;
1814
1815 wake_up(&log->free_wait);
1816 return;
1817 }
1818
1819
1820 /*
1821 * NAME: lbmRedrive
1822 *
1823 * FUNCTION: add a log buffer to the the log redrive list
1824 *
1825 * PARAMETER:
1826 * bp - log buffer
1827 *
1828 * NOTES:
1829 * Takes log_redrive_lock.
1830 */
lbmRedrive(struct lbuf * bp)1831 static inline void lbmRedrive(struct lbuf *bp)
1832 {
1833 unsigned long flags;
1834
1835 spin_lock_irqsave(&log_redrive_lock, flags);
1836 bp->l_redrive_next = log_redrive_list;
1837 log_redrive_list = bp;
1838 spin_unlock_irqrestore(&log_redrive_lock, flags);
1839
1840 wake_up(&jfs_IO_thread_wait);
1841 }
1842
1843
1844 /*
1845 * lbmRead()
1846 */
lbmRead(struct jfs_log * log,int pn,struct lbuf ** bpp)1847 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1848 {
1849 struct lbuf *bp;
1850
1851 /*
1852 * allocate a log buffer
1853 */
1854 *bpp = bp = lbmAllocate(log, pn);
1855 jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1856
1857 bp->l_flag |= lbmREAD;
1858 bp->l_bh.b_reqnext = NULL;
1859 clear_bit(BH_Uptodate, &bp->l_bh.b_state);
1860 lock_buffer(&bp->l_bh);
1861 set_bit(BH_Mapped, &bp->l_bh.b_state);
1862 set_bit(BH_Req, &bp->l_bh.b_state);
1863 bp->l_bh.b_rdev = bp->l_bh.b_dev;
1864 bp->l_bh.b_rsector = bp->l_blkno << (log->l2bsize - 9);
1865 generic_make_request(READ, &bp->l_bh);
1866 run_task_queue(&tq_disk);
1867
1868 wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
1869
1870 return 0;
1871 }
1872
1873
1874 /*
1875 * lbmWrite()
1876 *
1877 * buffer at head of pageout queue stays after completion of
1878 * partial-page pageout and redriven by explicit initiation of
1879 * pageout by caller until full-page pageout is completed and
1880 * released.
1881 *
1882 * device driver i/o done redrives pageout of new buffer at
1883 * head of pageout queue when current buffer at head of pageout
1884 * queue is released at the completion of its full-page pageout.
1885 *
1886 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
1887 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
1888 */
lbmWrite(struct jfs_log * log,struct lbuf * bp,int flag,int cant_block)1889 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
1890 int cant_block)
1891 {
1892 struct lbuf *tail;
1893 unsigned long flags;
1894
1895 jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
1896
1897 /* map the logical block address to physical block address */
1898 bp->l_blkno =
1899 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
1900
1901 LCACHE_LOCK(flags); /* disable+lock */
1902
1903 /*
1904 * initialize buffer for device driver
1905 */
1906 bp->l_flag = flag;
1907
1908 /*
1909 * insert bp at tail of write queue associated with log
1910 *
1911 * (request is either for bp already/currently at head of queue
1912 * or new bp to be inserted at tail)
1913 */
1914 tail = log->wqueue;
1915
1916 /* is buffer not already on write queue ? */
1917 if (bp->l_wqnext == NULL) {
1918 /* insert at tail of wqueue */
1919 if (tail == NULL) {
1920 log->wqueue = bp;
1921 bp->l_wqnext = bp;
1922 } else {
1923 log->wqueue = bp;
1924 bp->l_wqnext = tail->l_wqnext;
1925 tail->l_wqnext = bp;
1926 }
1927
1928 tail = bp;
1929 }
1930
1931 /* is buffer at head of wqueue and for write ? */
1932 if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
1933 LCACHE_UNLOCK(flags); /* unlock+enable */
1934 return;
1935 }
1936
1937 LCACHE_UNLOCK(flags); /* unlock+enable */
1938
1939 if (cant_block)
1940 lbmRedrive(bp);
1941 else if (flag & lbmSYNC)
1942 lbmStartIO(bp);
1943 else {
1944 LOGGC_UNLOCK(log);
1945 lbmStartIO(bp);
1946 LOGGC_LOCK(log);
1947 }
1948 }
1949
1950
1951 /*
1952 * lbmDirectWrite()
1953 *
1954 * initiate pageout bypassing write queue for sidestream
1955 * (e.g., log superblock) write;
1956 */
lbmDirectWrite(struct jfs_log * log,struct lbuf * bp,int flag)1957 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
1958 {
1959 jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
1960 bp, flag, bp->l_pn);
1961
1962 /*
1963 * initialize buffer for device driver
1964 */
1965 bp->l_flag = flag | lbmDIRECT;
1966
1967 /* map the logical block address to physical block address */
1968 bp->l_blkno =
1969 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
1970
1971 /*
1972 * initiate pageout of the page
1973 */
1974 lbmStartIO(bp);
1975 }
1976
1977
1978 /*
1979 * NAME: lbmStartIO()
1980 *
1981 * FUNCTION: Interface to DD strategy routine
1982 *
1983 * RETURN: none
1984 *
1985 * serialization: LCACHE_LOCK() is NOT held during log i/o;
1986 */
lbmStartIO(struct lbuf * bp)1987 static void lbmStartIO(struct lbuf * bp)
1988 {
1989 jfs_info("lbmStartIO");
1990
1991 bp->l_bh.b_reqnext = NULL;
1992 set_bit(BH_Dirty, &bp->l_bh.b_state);
1993 // lock_buffer(&bp->l_bh);
1994 assert(!test_bit(BH_Lock, &bp->l_bh.b_state));
1995 set_bit(BH_Lock, &bp->l_bh.b_state);
1996
1997 set_bit(BH_Mapped, &bp->l_bh.b_state);
1998 set_bit(BH_Req, &bp->l_bh.b_state);
1999 bp->l_bh.b_rdev = bp->l_bh.b_dev;
2000 bp->l_bh.b_rsector = bp->l_blkno << (bp->l_log->l2bsize - 9);
2001
2002 if (bp->l_log->no_integrity)
2003 /* don't really do I/O */
2004 lbmIODone(&bp->l_bh, 1);
2005 else
2006 generic_make_request(WRITE, &bp->l_bh);
2007
2008 INCREMENT(lmStat.submitted);
2009 run_task_queue(&tq_disk);
2010 }
2011
2012
2013 /*
2014 * lbmIOWait()
2015 */
lbmIOWait(struct lbuf * bp,int flag)2016 static int lbmIOWait(struct lbuf * bp, int flag)
2017 {
2018 unsigned long flags;
2019 int rc = 0;
2020
2021 jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2022
2023 LCACHE_LOCK(flags); /* disable+lock */
2024
2025 LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2026
2027 rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2028
2029 if (flag & lbmFREE)
2030 lbmfree(bp);
2031
2032 LCACHE_UNLOCK(flags); /* unlock+enable */
2033
2034 jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2035 return rc;
2036 }
2037
2038 /*
2039 * lbmIODone()
2040 *
2041 * executed at INTIODONE level
2042 */
lbmIODone(struct buffer_head * bh,int uptodate)2043 static void lbmIODone(struct buffer_head *bh, int uptodate)
2044 {
2045 struct lbuf *bp = bh->b_private;
2046 struct lbuf *nextbp, *tail;
2047 struct jfs_log *log;
2048 unsigned long flags;
2049
2050 /*
2051 * get back jfs buffer bound to the i/o buffer
2052 */
2053 jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2054
2055 LCACHE_LOCK(flags); /* disable+lock */
2056
2057 unlock_buffer(&bp->l_bh);
2058 bp->l_flag |= lbmDONE;
2059
2060 if (!uptodate) {
2061 bp->l_flag |= lbmERROR;
2062
2063 jfs_err("lbmIODone: I/O error in JFS log");
2064 }
2065
2066 /*
2067 * pagein completion
2068 */
2069 if (bp->l_flag & lbmREAD) {
2070 bp->l_flag &= ~lbmREAD;
2071
2072 LCACHE_UNLOCK(flags); /* unlock+enable */
2073
2074 /* wakeup I/O initiator */
2075 LCACHE_WAKEUP(&bp->l_ioevent);
2076
2077 return;
2078 }
2079
2080 /*
2081 * pageout completion
2082 *
2083 * the bp at the head of write queue has completed pageout.
2084 *
2085 * if single-commit/full-page pageout, remove the current buffer
2086 * from head of pageout queue, and redrive pageout with
2087 * the new buffer at head of pageout queue;
2088 * otherwise, the partial-page pageout buffer stays at
2089 * the head of pageout queue to be redriven for pageout
2090 * by lmGroupCommit() until full-page pageout is completed.
2091 */
2092 bp->l_flag &= ~lbmWRITE;
2093 INCREMENT(lmStat.pagedone);
2094
2095 /* update committed lsn */
2096 log = bp->l_log;
2097 log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2098
2099 if (bp->l_flag & lbmDIRECT) {
2100 LCACHE_WAKEUP(&bp->l_ioevent);
2101 LCACHE_UNLOCK(flags);
2102 return;
2103 }
2104
2105 tail = log->wqueue;
2106
2107 /* single element queue */
2108 if (bp == tail) {
2109 /* remove head buffer of full-page pageout
2110 * from log device write queue
2111 */
2112 if (bp->l_flag & lbmRELEASE) {
2113 log->wqueue = NULL;
2114 bp->l_wqnext = NULL;
2115 }
2116 }
2117 /* multi element queue */
2118 else {
2119 /* remove head buffer of full-page pageout
2120 * from log device write queue
2121 */
2122 if (bp->l_flag & lbmRELEASE) {
2123 nextbp = tail->l_wqnext = bp->l_wqnext;
2124 bp->l_wqnext = NULL;
2125
2126 /*
2127 * redrive pageout of next page at head of write queue:
2128 * redrive next page without any bound tblk
2129 * (i.e., page w/o any COMMIT records), or
2130 * first page of new group commit which has been
2131 * queued after current page (subsequent pageout
2132 * is performed synchronously, except page without
2133 * any COMMITs) by lmGroupCommit() as indicated
2134 * by lbmWRITE flag;
2135 */
2136 if (nextbp->l_flag & lbmWRITE) {
2137 /*
2138 * We can't do the I/O at interrupt time.
2139 * The jfsIO thread can do it
2140 */
2141 lbmRedrive(nextbp);
2142 }
2143 }
2144 }
2145
2146 /*
2147 * synchronous pageout:
2148 *
2149 * buffer has not necessarily been removed from write queue
2150 * (e.g., synchronous write of partial-page with COMMIT):
2151 * leave buffer for i/o initiator to dispose
2152 */
2153 if (bp->l_flag & lbmSYNC) {
2154 LCACHE_UNLOCK(flags); /* unlock+enable */
2155
2156 /* wakeup I/O initiator */
2157 LCACHE_WAKEUP(&bp->l_ioevent);
2158 }
2159
2160 /*
2161 * Group Commit pageout:
2162 */
2163 else if (bp->l_flag & lbmGC) {
2164 LCACHE_UNLOCK(flags);
2165 lmPostGC(bp);
2166 }
2167
2168 /*
2169 * asynchronous pageout:
2170 *
2171 * buffer must have been removed from write queue:
2172 * insert buffer at head of freelist where it can be recycled
2173 */
2174 else {
2175 assert(bp->l_flag & lbmRELEASE);
2176 assert(bp->l_flag & lbmFREE);
2177 lbmfree(bp);
2178
2179 LCACHE_UNLOCK(flags); /* unlock+enable */
2180 }
2181 }
2182
jfsIOWait(void * arg)2183 int jfsIOWait(void *arg)
2184 {
2185 struct lbuf *bp;
2186
2187 lock_kernel();
2188
2189 daemonize();
2190 current->tty = NULL;
2191 strcpy(current->comm, "jfsIO");
2192
2193 unlock_kernel();
2194
2195 spin_lock_irq(¤t->sigmask_lock);
2196 sigfillset(¤t->blocked);
2197 recalc_sigpending(current);
2198 spin_unlock_irq(¤t->sigmask_lock);
2199
2200 complete(&jfsIOwait);
2201
2202 do {
2203 DECLARE_WAITQUEUE(wq, current);
2204
2205 spin_lock_irq(&log_redrive_lock);
2206 while ((bp = log_redrive_list)) {
2207 log_redrive_list = bp->l_redrive_next;
2208 bp->l_redrive_next = NULL;
2209 spin_unlock_irq(&log_redrive_lock);
2210 lbmStartIO(bp);
2211 spin_lock_irq(&log_redrive_lock);
2212 }
2213 add_wait_queue(&jfs_IO_thread_wait, &wq);
2214 set_current_state(TASK_INTERRUPTIBLE);
2215 spin_unlock_irq(&log_redrive_lock);
2216 schedule();
2217 current->state = TASK_RUNNING;
2218 remove_wait_queue(&jfs_IO_thread_wait, &wq);
2219 } while (!jfs_stop_threads);
2220
2221 jfs_info("jfsIOWait being killed!");
2222 complete_and_exit(&jfsIOwait, 0);
2223 }
2224
2225 /*
2226 * NAME: lmLogFormat()/jfs_logform()
2227 *
2228 * FUNCTION: format file system log
2229 *
2230 * PARAMETERS:
2231 * log - volume log
2232 * logAddress - start address of log space in FS block
2233 * logSize - length of log space in FS block;
2234 *
2235 * RETURN: 0 - success
2236 * -EIO - i/o error
2237 *
2238 * XXX: We're synchronously writing one page at a time. This needs to
2239 * be improved by writing multiple pages at once.
2240 */
lmLogFormat(struct jfs_log * log,s64 logAddress,int logSize)2241 int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2242 {
2243 int rc = -EIO;
2244 struct jfs_sb_info *sbi = JFS_SBI(log->sb);
2245 struct logsuper *logsuper;
2246 struct logpage *lp;
2247 int lspn; /* log sequence page number */
2248 struct lrd *lrd_ptr;
2249 int npages = 0;
2250 struct lbuf *bp;
2251
2252 jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2253 (long long)logAddress, logSize);
2254
2255 /* allocate a log buffer */
2256 bp = lbmAllocate(log, 1);
2257
2258 npages = logSize >> sbi->l2nbperpage;
2259
2260 /*
2261 * log space:
2262 *
2263 * page 0 - reserved;
2264 * page 1 - log superblock;
2265 * page 2 - log data page: A SYNC log record is written
2266 * into this page at logform time;
2267 * pages 3-N - log data page: set to empty log data pages;
2268 */
2269 /*
2270 * init log superblock: log page 1
2271 */
2272 logsuper = (struct logsuper *) bp->l_ldata;
2273
2274 logsuper->magic = cpu_to_le32(LOGMAGIC);
2275 logsuper->version = cpu_to_le32(LOGVERSION);
2276 logsuper->state = cpu_to_le32(LOGREDONE);
2277 logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */
2278 logsuper->size = cpu_to_le32(npages);
2279 logsuper->bsize = cpu_to_le32(sbi->bsize);
2280 logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2281 logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2282
2283 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2284 bp->l_blkno = logAddress + sbi->nbperpage;
2285 lbmStartIO(bp);
2286 if ((rc = lbmIOWait(bp, 0)))
2287 goto exit;
2288
2289 /*
2290 * init pages 2 to npages-1 as log data pages:
2291 *
2292 * log page sequence number (lpsn) initialization:
2293 *
2294 * pn: 0 1 2 3 n-1
2295 * +-----+-----+=====+=====+===.....===+=====+
2296 * lspn: N-1 0 1 N-2
2297 * <--- N page circular file ---->
2298 *
2299 * the N (= npages-2) data pages of the log is maintained as
2300 * a circular file for the log records;
2301 * lpsn grows by 1 monotonically as each log page is written
2302 * to the circular file of the log;
2303 * and setLogpage() will not reset the page number even if
2304 * the eor is equal to LOGPHDRSIZE. In order for binary search
2305 * still work in find log end process, we have to simulate the
2306 * log wrap situation at the log format time.
2307 * The 1st log page written will have the highest lpsn. Then
2308 * the succeeding log pages will have ascending order of
2309 * the lspn starting from 0, ... (N-2)
2310 */
2311 lp = (struct logpage *) bp->l_ldata;
2312 /*
2313 * initialize 1st log page to be written: lpsn = N - 1,
2314 * write a SYNCPT log record is written to this page
2315 */
2316 lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2317 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2318
2319 lrd_ptr = (struct lrd *) &lp->data;
2320 lrd_ptr->logtid = 0;
2321 lrd_ptr->backchain = 0;
2322 lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2323 lrd_ptr->length = 0;
2324 lrd_ptr->log.syncpt.sync = 0;
2325
2326 bp->l_blkno += sbi->nbperpage;
2327 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2328 lbmStartIO(bp);
2329 if ((rc = lbmIOWait(bp, 0)))
2330 goto exit;
2331
2332 /*
2333 * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2334 */
2335 for (lspn = 0; lspn < npages - 3; lspn++) {
2336 lp->h.page = lp->t.page = cpu_to_le32(lspn);
2337 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2338
2339 bp->l_blkno += sbi->nbperpage;
2340 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2341 lbmStartIO(bp);
2342 if ((rc = lbmIOWait(bp, 0)))
2343 goto exit;
2344 }
2345
2346 rc = 0;
2347 exit:
2348 /*
2349 * finalize log
2350 */
2351 /* release the buffer */
2352 lbmFree(bp);
2353
2354 return rc;
2355 }
2356
2357 #ifdef CONFIG_JFS_STATISTICS
jfs_lmstats_read(char * buffer,char ** start,off_t offset,int length,int * eof,void * data)2358 int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
2359 int *eof, void *data)
2360 {
2361 int len = 0;
2362 off_t begin;
2363
2364 len += sprintf(buffer,
2365 "JFS Logmgr stats\n"
2366 "================\n"
2367 "commits = %d\n"
2368 "writes submitted = %d\n"
2369 "writes completed = %d\n"
2370 "full pages submitted = %d\n"
2371 "partial pages submitted = %d\n",
2372 lmStat.commit,
2373 lmStat.submitted,
2374 lmStat.pagedone,
2375 lmStat.full_page,
2376 lmStat.partial_page);
2377
2378 begin = offset;
2379 *start = buffer + begin;
2380 len -= begin;
2381
2382 if (len > length)
2383 len = length;
2384 else
2385 *eof = 1;
2386
2387 if (len < 0)
2388 len = 0;
2389
2390 return len;
2391 }
2392 #endif /* CONFIG_JFS_STATISTICS */
2393