1 /*
2  *   Copyright (c) International Business Machines Corp., 2000-2003
3  *   Portions Copyright (c) Christoph Hellwig, 2001-2002
4  *
5  *   This program is free software;  you can redistribute it and/or modify
6  *   it under the terms of the GNU General Public License as published by
7  *   the Free Software Foundation; either version 2 of the License, or
8  *   (at your option) any later version.
9  *
10  *   This program is distributed in the hope that it will be useful,
11  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
13  *   the GNU General Public License for more details.
14  *
15  *   You should have received a copy of the GNU General Public License
16  *   along with this program;  if not, write to the Free Software
17  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19 #ifndef	_H_JFS_LOGMGR
20 #define _H_JFS_LOGMGR
21 
22 #include "jfs_filsys.h"
23 #include "jfs_lock.h"
24 
25 /*
26  *	log manager configuration parameters
27  */
28 
29 /* log page size */
30 #define	LOGPSIZE	4096
31 #define	L2LOGPSIZE	12
32 
33 #define LOGPAGES	16	/* Log pages per mounted file system */
34 
35 /*
36  *	log logical volume
37  *
38  * a log is used to make the commit operation on journalled
39  * files within the same logical volume group atomic.
40  * a log is implemented with a logical volume.
41  * there is one log per logical volume group.
42  *
43  * block 0 of the log logical volume is not used (ipl etc).
44  * block 1 contains a log "superblock" and is used by logFormat(),
45  * lmLogInit(), lmLogShutdown(), and logRedo() to record status
46  * of the log but is not otherwise used during normal processing.
47  * blocks 2 - (N-1) are used to contain log records.
48  *
49  * when a volume group is varied-on-line, logRedo() must have
50  * been executed before the file systems (logical volumes) in
51  * the volume group can be mounted.
52  */
53 /*
54  *	log superblock (block 1 of logical volume)
55  */
56 #define	LOGSUPER_B	1
57 #define	LOGSTART_B	2
58 
59 #define	LOGMAGIC	0x87654321
60 #define	LOGVERSION	1
61 
62 #define MAX_ACTIVE	128	/* Max active file systems sharing log */
63 
64 struct logsuper {
65 	u32 magic;		/* 4: log lv identifier */
66 	s32 version;		/* 4: version number */
67 	s32 serial;		/* 4: log open/mount counter */
68 	s32 size;		/* 4: size in number of LOGPSIZE blocks */
69 	s32 bsize;		/* 4: logical block size in byte */
70 	s32 l2bsize;		/* 4: log2 of bsize */
71 
72 	u32 flag;		/* 4: option */
73 	u32 state;		/* 4: state - see below */
74 
75 	s32 end;		/* 4: addr of last log record set by logredo */
76 	char uuid[16];		/* 16: 128-bit journal uuid */
77 	char label[16];		/* 16: journal label */
78 	struct {
79 		char uuid[16];
80 	} active[MAX_ACTIVE];	/* 2048: active file systems list */
81 };
82 
83 #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
84 
85 /* log flag: commit option (see jfs_filsys.h) */
86 
87 /* log state */
88 #define	LOGMOUNT	0	/* log mounted by lmLogInit() */
89 #define LOGREDONE	1	/* log shutdown by lmLogShutdown().
90 				 * log redo completed by logredo().
91 				 */
92 #define LOGWRAP		2	/* log wrapped */
93 #define LOGREADERR	3	/* log read error detected in logredo() */
94 
95 
96 /*
97  *	log logical page
98  *
99  * (this comment should be rewritten !)
100  * the header and trailer structures (h,t) will normally have
101  * the same page and eor value.
102  * An exception to this occurs when a complete page write is not
103  * accomplished on a power failure. Since the hardware may "split write"
104  * sectors in the page, any out of order sequence may occur during powerfail
105  * and needs to be recognized during log replay.  The xor value is
106  * an "exclusive or" of all log words in the page up to eor.  This
107  * 32 bit eor is stored with the top 16 bits in the header and the
108  * bottom 16 bits in the trailer.  logredo can easily recognize pages
109  * that were not completed by reconstructing this eor and checking
110  * the log page.
111  *
112  * Previous versions of the operating system did not allow split
113  * writes and detected partially written records in logredo by
114  * ordering the updates to the header, trailer, and the move of data
115  * into the logdata area.  The order: (1) data is moved (2) header
116  * is updated (3) trailer is updated.  In logredo, when the header
117  * differed from the trailer, the header and trailer were reconciled
118  * as follows: if h.page != t.page they were set to the smaller of
119  * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
120  * h.eor != t.eor they were set to the smaller of their two values.
121  */
122 struct logpage {
123 	struct {		/* header */
124 		s32 page;	/* 4: log sequence page number */
125 		s16 rsrvd;	/* 2: */
126 		s16 eor;	/* 2: end-of-log offset of lasrt record write */
127 	} h;
128 
129 	s32 data[LOGPSIZE / 4 - 4];	/* log record area */
130 
131 	struct {		/* trailer */
132 		s32 page;	/* 4: normally the same as h.page */
133 		s16 rsrvd;	/* 2: */
134 		s16 eor;	/* 2: normally the same as h.eor */
135 	} t;
136 };
137 
138 #define LOGPHDRSIZE	8	/* log page header size */
139 #define LOGPTLRSIZE	8	/* log page trailer size */
140 
141 
142 /*
143  *	log record
144  *
145  * (this comment should be rewritten !)
146  * jfs uses only "after" log records (only a single writer is allowed
147  * in a  page, pages are written to temporary paging space if
148  * if they must be written to disk before commit, and i/o is
149  * scheduled for modified pages to their home location after
150  * the log records containing the after values and the commit
151  * record is written to the log on disk, undo discards the copy
152  * in main-memory.)
153  *
154  * a log record consists of a data area of variable length followed by
155  * a descriptor of fixed size LOGRDSIZE bytes.
156  * the  data area is rounded up to an integral number of 4-bytes and
157  * must be no longer than LOGPSIZE.
158  * the descriptor is of size of multiple of 4-bytes and aligned on a
159  * 4-byte boundary.
160  * records are packed one after the other in the data area of log pages.
161  * (sometimes a DUMMY record is inserted so that at least one record ends
162  * on every page or the longest record is placed on at most two pages).
163  * the field eor in page header/trailer points to the byte following
164  * the last record on a page.
165  */
166 
167 /* log record types */
168 #define LOG_COMMIT		0x8000
169 #define LOG_SYNCPT		0x4000
170 #define LOG_MOUNT		0x2000
171 #define LOG_REDOPAGE		0x0800
172 #define LOG_NOREDOPAGE		0x0080
173 #define LOG_NOREDOINOEXT	0x0040
174 #define LOG_UPDATEMAP		0x0008
175 #define LOG_NOREDOFILE		0x0001
176 
177 /* REDOPAGE/NOREDOPAGE log record data type */
178 #define	LOG_INODE		0x0001
179 #define	LOG_XTREE		0x0002
180 #define	LOG_DTREE		0x0004
181 #define	LOG_BTROOT		0x0010
182 #define	LOG_EA			0x0020
183 #define	LOG_ACL			0x0040
184 #define	LOG_DATA		0x0080
185 #define	LOG_NEW			0x0100
186 #define	LOG_EXTEND		0x0200
187 #define LOG_RELOCATE		0x0400
188 #define LOG_DIR_XTREE		0x0800	/* Xtree is in directory inode */
189 
190 /* UPDATEMAP log record descriptor type */
191 #define	LOG_ALLOCXADLIST	0x0080
192 #define	LOG_ALLOCPXDLIST	0x0040
193 #define	LOG_ALLOCXAD		0x0020
194 #define	LOG_ALLOCPXD		0x0010
195 #define	LOG_FREEXADLIST		0x0008
196 #define	LOG_FREEPXDLIST		0x0004
197 #define	LOG_FREEXAD		0x0002
198 #define	LOG_FREEPXD		0x0001
199 
200 
201 struct lrd {
202 	/*
203 	 * type independent area
204 	 */
205 	s32 logtid;		/* 4: log transaction identifier */
206 	s32 backchain;		/* 4: ptr to prev record of same transaction */
207 	u16 type;		/* 2: record type */
208 	s16 length;		/* 2: length of data in record (in byte) */
209 	u32 aggregate;		/* 4: file system lv/aggregate */
210 	/* (16) */
211 
212 	/*
213 	 * type dependent area (20)
214 	 */
215 	union {
216 
217 		/*
218 		 *      COMMIT: commit
219 		 *
220 		 * transaction commit: no type-dependent information;
221 		 */
222 
223 		/*
224 		 *      REDOPAGE: after-image
225 		 *
226 		 * apply after-image;
227 		 *
228 		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
229 		 */
230 		struct {
231 			u32 fileset;	/* 4: fileset number */
232 			u32 inode;	/* 4: inode number */
233 			u16 type;	/* 2: REDOPAGE record type */
234 			s16 l2linesize;	/* 2: log2 of line size */
235 			pxd_t pxd;	/* 8: on-disk page pxd */
236 		} redopage;	/* (20) */
237 
238 		/*
239 		 *      NOREDOPAGE: the page is freed
240 		 *
241 		 * do not apply after-image records which precede this record
242 		 * in the log with the same page block number to this page.
243 		 *
244 		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
245 		 */
246 		struct {
247 			s32 fileset;	/* 4: fileset number */
248 			u32 inode;	/* 4: inode number */
249 			u16 type;	/* 2: NOREDOPAGE record type */
250 			s16 rsrvd;	/* 2: reserved */
251 			pxd_t pxd;	/* 8: on-disk page pxd */
252 		} noredopage;	/* (20) */
253 
254 		/*
255 		 *      UPDATEMAP: update block allocation map
256 		 *
257 		 * either in-line PXD,
258 		 * or     out-of-line  XADLIST;
259 		 *
260 		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
261 		 */
262 		struct {
263 			u32 fileset;	/* 4: fileset number */
264 			u32 inode;	/* 4: inode number */
265 			u16 type;	/* 2: UPDATEMAP record type */
266 			s16 nxd;	/* 2: number of extents */
267 			pxd_t pxd;	/* 8: pxd */
268 		} updatemap;	/* (20) */
269 
270 		/*
271 		 *      NOREDOINOEXT: the inode extent is freed
272 		 *
273 		 * do not apply after-image records which precede this
274 		 * record in the log with the any of the 4 page block
275 		 * numbers in this inode extent.
276 		 *
277 		 * NOTE: The fileset and pxd fields MUST remain in
278 		 *       the same fields in the REDOPAGE record format.
279 		 *
280 		 */
281 		struct {
282 			s32 fileset;	/* 4: fileset number */
283 			s32 iagnum;	/* 4: IAG number     */
284 			s32 inoext_idx;	/* 4: inode extent index */
285 			pxd_t pxd;	/* 8: on-disk page pxd */
286 		} noredoinoext;	/* (20) */
287 
288 		/*
289 		 *      SYNCPT: log sync point
290 		 *
291 		 * replay log upto syncpt address specified;
292 		 */
293 		struct {
294 			s32 sync;	/* 4: syncpt address (0 = here) */
295 		} syncpt;
296 
297 		/*
298 		 *      MOUNT: file system mount
299 		 *
300 		 * file system mount: no type-dependent information;
301 		 */
302 
303 		/*
304 		 *      ? FREEXTENT: free specified extent(s)
305 		 *
306 		 * free specified extent(s) from block allocation map
307 		 * N.B.: nextents should be length of data/sizeof(xad_t)
308 		 */
309 		struct {
310 			s32 type;	/* 4: FREEXTENT record type */
311 			s32 nextent;	/* 4: number of extents */
312 
313 			/* data: PXD or XAD list */
314 		} freextent;
315 
316 		/*
317 		 *      ? NOREDOFILE: this file is freed
318 		 *
319 		 * do not apply records which precede this record in the log
320 		 * with the same inode number.
321 		 *
322 		 * NOREDILE must be the first to be written at commit
323 		 * (last to be read in logredo()) - it prevents
324 		 * replay of preceding updates of all preceding generations
325 		 * of the inumber esp. the on-disk inode itself,
326 		 * but does NOT prevent
327 		 * replay of the
328 		 */
329 		struct {
330 			s32 fileset;	/* 4: fileset number */
331 			u32 inode;	/* 4: inode number */
332 		} noredofile;
333 
334 		/*
335 		 *      ? NEWPAGE:
336 		 *
337 		 * metadata type dependent
338 		 */
339 		struct {
340 			s32 fileset;	/* 4: fileset number */
341 			u32 inode;	/* 4: inode number */
342 			s32 type;	/* 4: NEWPAGE record type */
343 			pxd_t pxd;	/* 8: on-disk page pxd */
344 		} newpage;
345 
346 		/*
347 		 *      ? DUMMY: filler
348 		 *
349 		 * no type-dependent information
350 		 */
351 	} log;
352 };					/* (36) */
353 
354 #define	LOGRDSIZE	(sizeof(struct lrd))
355 
356 /*
357  *	line vector descriptor
358  */
359 struct lvd {
360 	s16 offset;
361 	s16 length;
362 };
363 
364 
365 /*
366  *	log logical volume
367  */
368 struct jfs_log {
369 
370 	struct super_block *sb;	/* 4: This is used to sync metadata
371 				 *    before writing syncpt.  Will
372 				 *    need to be a list if we share
373 				 *    the log between fs's
374 				 */
375 	struct block_device *bdev; /* 4: log lv pointer */
376 	s32 serial;		/* 4: log mount serial number */
377 
378 	s64 base;		/* @8: log extent address (inline log ) */
379 	int size;		/* 4: log size in log page (in page) */
380 	int l2bsize;		/* 4: log2 of bsize */
381 
382 	long flag;		/* 4: flag */
383 
384 	struct lbuf *lbuf_free;	/* 4: free lbufs */
385 	wait_queue_head_t free_wait;	/* 4: */
386 
387 	/* log write */
388 	int logtid;		/* 4: log tid */
389 	int page;		/* 4: page number of eol page */
390 	int eor;		/* 4: eor of last record in eol page */
391 	struct lbuf *bp;	/* 4: current log page buffer */
392 
393 	struct semaphore loglock;	/* 4: log write serialization lock */
394 
395 	/* syncpt */
396 	int nextsync;		/* 4: bytes to write before next syncpt */
397 	int active;		/* 4: */
398 	wait_queue_head_t syncwait;	/* 4: */
399 
400 	/* commit */
401 	uint cflag;		/* 4: */
402 	struct {		/* 8: FIFO commit queue header */
403 		struct tblock *head;
404 		struct tblock *tail;
405 	} cqueue;
406 	struct tblock *flush_tblk; /* tblk we're waiting on for flush */
407 	int gcrtc;		/* 4: GC_READY transaction count */
408 	struct tblock *gclrt;	/* 4: latest GC_READY transaction */
409 	spinlock_t gclock;	/* 4: group commit lock */
410 	int logsize;		/* 4: log data area size in byte */
411 	int lsn;		/* 4: end-of-log */
412 	int clsn;		/* 4: clsn */
413 	int syncpt;		/* 4: addr of last syncpt record */
414 	int sync;		/* 4: addr from last logsync() */
415 	struct list_head synclist;	/* 8: logsynclist anchor */
416 	spinlock_t synclock;	/* 4: synclist lock */
417 	struct lbuf *wqueue;	/* 4: log pageout queue */
418 	int count;		/* 4: count */
419 	char uuid[16];		/* 16: 128-bit uuid of log device */
420 
421 	int no_integrity;	/* flag to disable journaling to disk */
422 	int ni_page;		/* backup of page for nointegrity option */
423 	int ni_eor;		/* backup of eor for nointegrity option */
424 };
425 
426 /*
427  * Log flag
428  */
429 #define log_INLINELOG	1
430 #define log_SYNCBARRIER	2
431 #define log_QUIESCE	3
432 #define log_FLUSH	4
433 
434 /*
435  * group commit flag
436  */
437 /* jfs_log */
438 #define logGC_PAGEOUT	0x00000001
439 
440 /* tblock/lbuf */
441 #define tblkGC_QUEUE		0x0001
442 #define tblkGC_READY		0x0002
443 #define tblkGC_COMMIT		0x0004
444 #define tblkGC_COMMITTED	0x0008
445 #define tblkGC_EOP		0x0010
446 #define tblkGC_FREE		0x0020
447 #define tblkGC_LEADER		0x0040
448 #define tblkGC_ERROR		0x0080
449 #define tblkGC_LAZY		0x0100	// D230860
450 #define tblkGC_UNLOCKED		0x0200	// D230860
451 
452 /*
453  *		log cache buffer header
454  */
455 struct lbuf {
456 	struct buffer_head l_bh;	/* for doing I/O */
457 	struct jfs_log *l_log;		/* 4: log associated with buffer */
458 
459 	/*
460 	 * data buffer base area
461 	 */
462 	uint l_flag;		/* 4: pageout control flags */
463 
464 	struct lbuf *l_wqnext;	/* 4: write queue link */
465 	struct lbuf *l_freelist;	/* 4: freelistlink */
466 
467 	int l_pn;		/* 4: log page number */
468 	int l_eor;		/* 4: log record eor */
469 	int l_ceor;		/* 4: committed log record eor */
470 
471 	s64 l_blkno;		/* 8: log page block number */
472 	caddr_t l_ldata;	/* 4: data page */
473 
474 	wait_queue_head_t l_ioevent;	/* 4: i/o done event */
475 	struct page *l_page;	/* The page itself */
476 };
477 
478 /* Reuse l_freelist for redrive list */
479 #define l_redrive_next l_freelist
480 
481 /*
482  *	logsynclist block
483  *
484  * common logsyncblk prefix for jbuf_t and tblock
485  */
486 struct logsyncblk {
487 	u16 xflag;		/* flags */
488 	u16 flag;		/* only meaninful in tblock */
489 	lid_t lid;		/* lock id */
490 	s32 lsn;		/* log sequence number */
491 	struct list_head synclist;	/* log sync list link */
492 };
493 
494 /*
495  *	logsynclist serialization (per log)
496  */
497 
498 #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
499 #define LOGSYNC_LOCK(log) spin_lock(&(log)->synclock)
500 #define LOGSYNC_UNLOCK(log) spin_unlock(&(log)->synclock)
501 
502 /* compute the difference in bytes of lsn from sync point */
503 #define logdiff(diff, lsn, log)\
504 {\
505 	diff = (lsn) - (log)->syncpt;\
506 	if (diff < 0)\
507 		diff += (log)->logsize;\
508 }
509 
510 extern int lmLogOpen(struct super_block *sb, struct jfs_log ** log);
511 extern int lmLogClose(struct super_block *sb, struct jfs_log * log);
512 extern int lmLogShutdown(struct jfs_log * log);
513 extern int lmLogInit(struct jfs_log * log);
514 extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
515 extern void jfs_flush_journal(struct jfs_log * log, int wait);
516 
517 #endif				/* _H_JFS_LOGMGR */
518