1 /*
2 * Copyright (C) International Business Machines Corp., 2000-2004
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 #include <linux/fs.h>
20 #include "jfs_incore.h"
21 #include "jfs_superblock.h"
22 #include "jfs_dmap.h"
23 #include "jfs_imap.h"
24 #include "jfs_lock.h"
25 #include "jfs_metapage.h"
26 #include "jfs_superblock.h"
27 #include "jfs_debug.h"
28
29 /*
30 * Debug code for double-checking block map
31 */
32 /* #define _JFS_DEBUG_DMAP 1 */
33
34 #ifdef _JFS_DEBUG_DMAP
35 #define DBINITMAP(size,ipbmap,results) \
36 DBinitmap(size,ipbmap,results)
37 #define DBALLOC(dbmap,mapsize,blkno,nblocks) \
38 DBAlloc(dbmap,mapsize,blkno,nblocks)
39 #define DBFREE(dbmap,mapsize,blkno,nblocks) \
40 DBFree(dbmap,mapsize,blkno,nblocks)
41 #define DBALLOCCK(dbmap,mapsize,blkno,nblocks) \
42 DBAllocCK(dbmap,mapsize,blkno,nblocks)
43 #define DBFREECK(dbmap,mapsize,blkno,nblocks) \
44 DBFreeCK(dbmap,mapsize,blkno,nblocks)
45
46 static void DBinitmap(s64, struct inode *, u32 **);
47 static void DBAlloc(uint *, s64, s64, s64);
48 static void DBFree(uint *, s64, s64, s64);
49 static void DBAllocCK(uint *, s64, s64, s64);
50 static void DBFreeCK(uint *, s64, s64, s64);
51 #else
52 #define DBINITMAP(size,ipbmap,results)
53 #define DBALLOC(dbmap, mapsize, blkno, nblocks)
54 #define DBFREE(dbmap, mapsize, blkno, nblocks)
55 #define DBALLOCCK(dbmap, mapsize, blkno, nblocks)
56 #define DBFREECK(dbmap, mapsize, blkno, nblocks)
57 #endif /* _JFS_DEBUG_DMAP */
58
59 /*
60 * SERIALIZATION of the Block Allocation Map.
61 *
62 * the working state of the block allocation map is accessed in
63 * two directions:
64 *
65 * 1) allocation and free requests that start at the dmap
66 * level and move up through the dmap control pages (i.e.
67 * the vast majority of requests).
68 *
69 * 2) allocation requests that start at dmap control page
70 * level and work down towards the dmaps.
71 *
72 * the serialization scheme used here is as follows.
73 *
74 * requests which start at the bottom are serialized against each
75 * other through buffers and each requests holds onto its buffers
76 * as it works it way up from a single dmap to the required level
77 * of dmap control page.
78 * requests that start at the top are serialized against each other
79 * and request that start from the bottom by the multiple read/single
80 * write inode lock of the bmap inode. requests starting at the top
81 * take this lock in write mode while request starting at the bottom
82 * take the lock in read mode. a single top-down request may proceed
83 * exclusively while multiple bottoms-up requests may proceed
84 * simultaneously (under the protection of busy buffers).
85 *
86 * in addition to information found in dmaps and dmap control pages,
87 * the working state of the block allocation map also includes read/
88 * write information maintained in the bmap descriptor (i.e. total
89 * free block count, allocation group level free block counts).
90 * a single exclusive lock (BMAP_LOCK) is used to guard this information
91 * in the face of multiple-bottoms up requests.
92 * (lock ordering: IREAD_LOCK, BMAP_LOCK);
93 *
94 * accesses to the persistent state of the block allocation map (limited
95 * to the persistent bitmaps in dmaps) is guarded by (busy) buffers.
96 */
97
98 #define BMAP_LOCK_INIT(bmp) init_MUTEX(&bmp->db_bmaplock)
99 #define BMAP_LOCK(bmp) down(&bmp->db_bmaplock)
100 #define BMAP_UNLOCK(bmp) up(&bmp->db_bmaplock)
101
102 /*
103 * forward references
104 */
105 static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
106 int nblocks);
107 static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval);
108 static void dbBackSplit(dmtree_t * tp, int leafno);
109 static void dbJoin(dmtree_t * tp, int leafno, int newval);
110 static void dbAdjTree(dmtree_t * tp, int leafno, int newval);
111 static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc,
112 int level);
113 static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results);
114 static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
115 int nblocks);
116 static int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno,
117 int nblocks,
118 int l2nb, s64 * results);
119 static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
120 int nblocks);
121 static int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks,
122 int l2nb,
123 s64 * results);
124 static int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb,
125 s64 * results);
126 static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno,
127 s64 * results);
128 static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks);
129 static int dbFindBits(u32 word, int l2nb);
130 static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno);
131 static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx);
132 static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
133 int nblocks);
134 static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
135 int nblocks);
136 static int dbMaxBud(u8 * cp);
137 s64 dbMapFileSizeToMapSize(struct inode *ipbmap);
138 static int blkstol2(s64 nb);
139
140 static int cntlz(u32 value);
141 static int cnttz(u32 word);
142
143 static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
144 int nblocks);
145 static int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks);
146 static int dbInitDmapTree(struct dmap * dp);
147 static int dbInitTree(struct dmaptree * dtp);
148 static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i);
149 static int dbGetL2AGSize(s64 nblocks);
150
151 /*
152 * buddy table
153 *
154 * table used for determining buddy sizes within characters of
155 * dmap bitmap words. the characters themselves serve as indexes
156 * into the table, with the table elements yielding the maximum
157 * binary buddy of free bits within the character.
158 */
159 static s8 budtab[256] = {
160 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
161 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
166 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
167 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
168 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
169 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
170 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
171 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
172 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
173 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
174 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
175 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1
176 };
177
178
179 /*
180 * NAME: dbMount()
181 *
182 * FUNCTION: initializate the block allocation map.
183 *
184 * memory is allocated for the in-core bmap descriptor and
185 * the in-core descriptor is initialized from disk.
186 *
187 * PARAMETERS:
188 * ipbmap - pointer to in-core inode for the block map.
189 *
190 * RETURN VALUES:
191 * 0 - success
192 * -ENOMEM - insufficient memory
193 * -EIO - i/o error
194 */
dbMount(struct inode * ipbmap)195 int dbMount(struct inode *ipbmap)
196 {
197 struct bmap *bmp;
198 struct dbmap *dbmp_le;
199 struct metapage *mp;
200 int i;
201
202 /*
203 * allocate/initialize the in-memory bmap descriptor
204 */
205 /* allocate memory for the in-memory bmap descriptor */
206 bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL);
207 if (bmp == NULL)
208 return -ENOMEM;
209
210 /* read the on-disk bmap descriptor. */
211 mp = read_metapage(ipbmap,
212 BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
213 PSIZE, 0);
214 if (mp == NULL) {
215 kfree(bmp);
216 return -EIO;
217 }
218
219 /* copy the on-disk bmap descriptor to its in-memory version. */
220 dbmp_le = (struct dbmap *) mp->data;
221 bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize);
222 bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree);
223 bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage);
224 bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag);
225 bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel);
226 bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
227 bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
228 bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
229 bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth);
230 bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
231 bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
232 bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
233 for (i = 0; i < MAXAG; i++)
234 bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]);
235 bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize);
236 bmp->db_maxfreebud = dbmp_le->dn_maxfreebud;
237
238 /* release the buffer. */
239 release_metapage(mp);
240
241 /* bind the bmap inode and the bmap descriptor to each other. */
242 bmp->db_ipbmap = ipbmap;
243 JFS_SBI(ipbmap->i_sb)->bmap = bmp;
244
245 memset(bmp->db_active, 0, sizeof(bmp->db_active));
246 DBINITMAP(bmp->db_mapsize, ipbmap, &bmp->db_DBmap);
247
248 /*
249 * allocate/initialize the bmap lock
250 */
251 BMAP_LOCK_INIT(bmp);
252
253 return (0);
254 }
255
256
257 /*
258 * NAME: dbUnmount()
259 *
260 * FUNCTION: terminate the block allocation map in preparation for
261 * file system unmount.
262 *
263 * the in-core bmap descriptor is written to disk and
264 * the memory for this descriptor is freed.
265 *
266 * PARAMETERS:
267 * ipbmap - pointer to in-core inode for the block map.
268 *
269 * RETURN VALUES:
270 * 0 - success
271 * -EIO - i/o error
272 */
dbUnmount(struct inode * ipbmap,int mounterror)273 int dbUnmount(struct inode *ipbmap, int mounterror)
274 {
275 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
276 int i;
277
278 if (!(mounterror || isReadOnly(ipbmap)))
279 dbSync(ipbmap);
280
281 /*
282 * Invalidate the page cache buffers
283 */
284 truncate_inode_pages(ipbmap->i_mapping, 0);
285
286 /*
287 * Sanity Check
288 */
289 for (i = 0; i < bmp->db_numag; i++)
290 if (atomic_read(&bmp->db_active[i]))
291 printk(KERN_ERR "dbUnmount: db_active[%d] = %d\n",
292 i, atomic_read(&bmp->db_active[i]));
293
294 /* free the memory for the in-memory bmap. */
295 kfree(bmp);
296
297 return (0);
298 }
299
300 /*
301 * dbSync()
302 */
dbSync(struct inode * ipbmap)303 int dbSync(struct inode *ipbmap)
304 {
305 struct dbmap *dbmp_le;
306 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
307 struct metapage *mp;
308 int i;
309
310 /*
311 * write bmap global control page
312 */
313 /* get the buffer for the on-disk bmap descriptor. */
314 mp = read_metapage(ipbmap,
315 BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
316 PSIZE, 0);
317 if (mp == NULL) {
318 jfs_err("dbSync: read_metapage failed!");
319 return -EIO;
320 }
321 /* copy the in-memory version of the bmap to the on-disk version */
322 dbmp_le = (struct dbmap *) mp->data;
323 dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize);
324 dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree);
325 dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage);
326 dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag);
327 dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel);
328 dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
329 dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
330 dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
331 dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth);
332 dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
333 dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
334 dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
335 for (i = 0; i < MAXAG; i++)
336 dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]);
337 dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize);
338 dbmp_le->dn_maxfreebud = bmp->db_maxfreebud;
339
340 /* write the buffer */
341 write_metapage(mp);
342
343 /*
344 * write out dirty pages of bmap
345 */
346 fsync_inode_data_buffers(ipbmap);
347
348 ipbmap->i_state |= I_DIRTY;
349 diWriteSpecial(ipbmap, 0);
350
351 return (0);
352 }
353
354
355 /*
356 * NAME: dbFree()
357 *
358 * FUNCTION: free the specified block range from the working block
359 * allocation map.
360 *
361 * the blocks will be free from the working map one dmap
362 * at a time.
363 *
364 * PARAMETERS:
365 * ip - pointer to in-core inode;
366 * blkno - starting block number to be freed.
367 * nblocks - number of blocks to be freed.
368 *
369 * RETURN VALUES:
370 * 0 - success
371 * -EIO - i/o error
372 */
dbFree(struct inode * ip,s64 blkno,s64 nblocks)373 int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
374 {
375 struct metapage *mp;
376 struct dmap *dp;
377 int nb, rc;
378 s64 lblkno, rem;
379 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
380 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
381
382 IREAD_LOCK(ipbmap);
383
384 /* block to be freed better be within the mapsize. */
385 if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) {
386 IREAD_UNLOCK(ipbmap);
387 printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n",
388 (unsigned long long) blkno,
389 (unsigned long long) nblocks);
390 jfs_error(ip->i_sb,
391 "dbFree: block to be freed is outside the map");
392 return -EIO;
393 }
394
395 /*
396 * free the blocks a dmap at a time.
397 */
398 mp = NULL;
399 for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
400 /* release previous dmap if any */
401 if (mp) {
402 write_metapage(mp);
403 }
404
405 /* get the buffer for the current dmap. */
406 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
407 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
408 if (mp == NULL) {
409 IREAD_UNLOCK(ipbmap);
410 return -EIO;
411 }
412 dp = (struct dmap *) mp->data;
413
414 /* determine the number of blocks to be freed from
415 * this dmap.
416 */
417 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
418
419 DBALLOCCK(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
420
421 /* free the blocks. */
422 if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) {
423 release_metapage(mp);
424 IREAD_UNLOCK(ipbmap);
425 return (rc);
426 }
427
428 DBFREE(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
429 }
430
431 /* write the last buffer. */
432 write_metapage(mp);
433
434 IREAD_UNLOCK(ipbmap);
435
436 return (0);
437 }
438
439
440 /*
441 * NAME: dbUpdatePMap()
442 *
443 * FUNCTION: update the allocation state (free or allocate) of the
444 * specified block range in the persistent block allocation map.
445 *
446 * the blocks will be updated in the persistent map one
447 * dmap at a time.
448 *
449 * PARAMETERS:
450 * ipbmap - pointer to in-core inode for the block map.
451 * free - TRUE if block range is to be freed from the persistent
452 * map; FALSE if it is to be allocated.
453 * blkno - starting block number of the range.
454 * nblocks - number of contiguous blocks in the range.
455 * tblk - transaction block;
456 *
457 * RETURN VALUES:
458 * 0 - success
459 * -EIO - i/o error
460 */
461 int
dbUpdatePMap(struct inode * ipbmap,int free,s64 blkno,s64 nblocks,struct tblock * tblk)462 dbUpdatePMap(struct inode *ipbmap,
463 int free, s64 blkno, s64 nblocks, struct tblock * tblk)
464 {
465 int nblks, dbitno, wbitno, rbits;
466 int word, nbits, nwords;
467 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
468 s64 lblkno, rem, lastlblkno;
469 u32 mask;
470 struct dmap *dp;
471 struct metapage *mp;
472 struct jfs_log *log;
473 int lsn, difft, diffp;
474
475 /* the blocks better be within the mapsize. */
476 if (blkno + nblocks > bmp->db_mapsize) {
477 printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n",
478 (unsigned long long) blkno,
479 (unsigned long long) nblocks);
480 jfs_error(ipbmap->i_sb,
481 "dbUpdatePMap: blocks are outside the map");
482 return -EIO;
483 }
484
485 /* compute delta of transaction lsn from log syncpt */
486 lsn = tblk->lsn;
487 log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
488 logdiff(difft, lsn, log);
489
490 /*
491 * update the block state a dmap at a time.
492 */
493 mp = NULL;
494 lastlblkno = 0;
495 for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) {
496 /* get the buffer for the current dmap. */
497 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
498 if (lblkno != lastlblkno) {
499 if (mp) {
500 write_metapage(mp);
501 }
502
503 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE,
504 0);
505 if (mp == NULL)
506 return -EIO;
507 }
508 dp = (struct dmap *) mp->data;
509
510 /* determine the bit number and word within the dmap of
511 * the starting block. also determine how many blocks
512 * are to be updated within this dmap.
513 */
514 dbitno = blkno & (BPERDMAP - 1);
515 word = dbitno >> L2DBWORD;
516 nblks = min(rem, (s64)BPERDMAP - dbitno);
517
518 /* update the bits of the dmap words. the first and last
519 * words may only have a subset of their bits updated. if
520 * this is the case, we'll work against that word (i.e.
521 * partial first and/or last) only in a single pass. a
522 * single pass will also be used to update all words that
523 * are to have all their bits updated.
524 */
525 for (rbits = nblks; rbits > 0;
526 rbits -= nbits, dbitno += nbits) {
527 /* determine the bit number within the word and
528 * the number of bits within the word.
529 */
530 wbitno = dbitno & (DBWORD - 1);
531 nbits = min(rbits, DBWORD - wbitno);
532
533 /* check if only part of the word is to be updated. */
534 if (nbits < DBWORD) {
535 /* update (free or allocate) the bits
536 * in this word.
537 */
538 mask =
539 (ONES << (DBWORD - nbits) >> wbitno);
540 if (free)
541 dp->pmap[word] &=
542 cpu_to_le32(~mask);
543 else
544 dp->pmap[word] |=
545 cpu_to_le32(mask);
546
547 word += 1;
548 } else {
549 /* one or more words are to have all
550 * their bits updated. determine how
551 * many words and how many bits.
552 */
553 nwords = rbits >> L2DBWORD;
554 nbits = nwords << L2DBWORD;
555
556 /* update (free or allocate) the bits
557 * in these words.
558 */
559 if (free)
560 memset(&dp->pmap[word], 0,
561 nwords * 4);
562 else
563 memset(&dp->pmap[word], (int) ONES,
564 nwords * 4);
565
566 word += nwords;
567 }
568 }
569
570 /*
571 * update dmap lsn
572 */
573 if (lblkno == lastlblkno)
574 continue;
575
576 lastlblkno = lblkno;
577
578 if (mp->lsn != 0) {
579 /* inherit older/smaller lsn */
580 logdiff(diffp, mp->lsn, log);
581 if (difft < diffp) {
582 mp->lsn = lsn;
583
584 /* move bp after tblock in logsync list */
585 LOGSYNC_LOCK(log);
586 list_del(&mp->synclist);
587 list_add(&mp->synclist, &tblk->synclist);
588 LOGSYNC_UNLOCK(log);
589 }
590
591 /* inherit younger/larger clsn */
592 LOGSYNC_LOCK(log);
593 logdiff(difft, tblk->clsn, log);
594 logdiff(diffp, mp->clsn, log);
595 if (difft > diffp)
596 mp->clsn = tblk->clsn;
597 LOGSYNC_UNLOCK(log);
598 } else {
599 mp->log = log;
600 mp->lsn = lsn;
601
602 /* insert bp after tblock in logsync list */
603 LOGSYNC_LOCK(log);
604
605 log->count++;
606 list_add(&mp->synclist, &tblk->synclist);
607
608 mp->clsn = tblk->clsn;
609 LOGSYNC_UNLOCK(log);
610 }
611 }
612
613 /* write the last buffer. */
614 if (mp) {
615 write_metapage(mp);
616 }
617
618 return (0);
619 }
620
621
622 /*
623 * NAME: dbNextAG()
624 *
625 * FUNCTION: find the preferred allocation group for new allocations.
626 *
627 * Within the allocation groups, we maintain a preferred
628 * allocation group which consists of a group with at least
629 * average free space. It is the preferred group that we target
630 * new inode allocation towards. The tie-in between inode
631 * allocation and block allocation occurs as we allocate the
632 * first (data) block of an inode and specify the inode (block)
633 * as the allocation hint for this block.
634 *
635 * We try to avoid having more than one open file growing in
636 * an allocation group, as this will lead to fragmentation.
637 * This differs from the old OS/2 method of trying to keep
638 * empty ags around for large allocations.
639 *
640 * PARAMETERS:
641 * ipbmap - pointer to in-core inode for the block map.
642 *
643 * RETURN VALUES:
644 * the preferred allocation group number.
645 */
dbNextAG(struct inode * ipbmap)646 int dbNextAG(struct inode *ipbmap)
647 {
648 s64 avgfree;
649 int agpref;
650 s64 hwm = 0;
651 int i;
652 int next_best = -1;
653 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
654
655 BMAP_LOCK(bmp);
656
657 /* determine the average number of free blocks within the ags. */
658 avgfree = (u32)bmp->db_nfree / bmp->db_numag;
659
660 /*
661 * if the current preferred ag does not have an active allocator
662 * and has at least average freespace, return it
663 */
664 agpref = bmp->db_agpref;
665 if ((atomic_read(&bmp->db_active[agpref]) == 0) &&
666 (bmp->db_agfree[agpref] >= avgfree))
667 goto unlock;
668
669 /* From the last preferred ag, find the next one with at least
670 * average free space.
671 */
672 for (i = 0 ; i < bmp->db_numag; i++, agpref++) {
673 if (agpref == bmp->db_numag)
674 agpref = 0;
675
676 if (atomic_read(&bmp->db_active[agpref]))
677 /* open file is currently growing in this ag */
678 continue;
679 if (bmp->db_agfree[agpref] >= avgfree) {
680 /* Return this one */
681 bmp->db_agpref = agpref;
682 goto unlock;
683 } else if (bmp->db_agfree[agpref] > hwm) {
684 /* Less than avg. freespace, but best so far */
685 hwm = bmp->db_agfree[agpref];
686 next_best = agpref;
687 }
688 }
689
690 /*
691 * If no inactive ag was found with average freespace, use the
692 * next best
693 */
694 if (next_best != -1)
695 bmp->db_agpref = next_best;
696 /* else leave db_agpref unchanged */
697 unlock:
698 BMAP_UNLOCK(bmp);
699
700 /* return the preferred group.
701 */
702 return (bmp->db_agpref);
703 }
704
705 /*
706 * NAME: dbAlloc()
707 *
708 * FUNCTION: attempt to allocate a specified number of contiguous free
709 * blocks from the working allocation block map.
710 *
711 * the block allocation policy uses hints and a multi-step
712 * approach.
713 *
714 * for allocation requests smaller than the number of blocks
715 * per dmap, we first try to allocate the new blocks
716 * immediately following the hint. if these blocks are not
717 * available, we try to allocate blocks near the hint. if
718 * no blocks near the hint are available, we next try to
719 * allocate within the same dmap as contains the hint.
720 *
721 * if no blocks are available in the dmap or the allocation
722 * request is larger than the dmap size, we try to allocate
723 * within the same allocation group as contains the hint. if
724 * this does not succeed, we finally try to allocate anywhere
725 * within the aggregate.
726 *
727 * we also try to allocate anywhere within the aggregate for
728 * for allocation requests larger than the allocation group
729 * size or requests that specify no hint value.
730 *
731 * PARAMETERS:
732 * ip - pointer to in-core inode;
733 * hint - allocation hint.
734 * nblocks - number of contiguous blocks in the range.
735 * results - on successful return, set to the starting block number
736 * of the newly allocated contiguous range.
737 *
738 * RETURN VALUES:
739 * 0 - success
740 * -ENOSPC - insufficient disk resources
741 * -EIO - i/o error
742 */
dbAlloc(struct inode * ip,s64 hint,s64 nblocks,s64 * results)743 int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
744 {
745 int rc, agno;
746 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
747 struct bmap *bmp;
748 struct metapage *mp;
749 s64 lblkno, blkno;
750 struct dmap *dp;
751 int l2nb;
752 s64 mapSize;
753 int writers;
754
755 /* assert that nblocks is valid */
756 assert(nblocks > 0);
757
758 #ifdef _STILL_TO_PORT
759 /* DASD limit check F226941 */
760 if (OVER_LIMIT(ip, nblocks))
761 return -ENOSPC;
762 #endif /* _STILL_TO_PORT */
763
764 /* get the log2 number of blocks to be allocated.
765 * if the number of blocks is not a log2 multiple,
766 * it will be rounded up to the next log2 multiple.
767 */
768 l2nb = BLKSTOL2(nblocks);
769
770 bmp = JFS_SBI(ip->i_sb)->bmap;
771
772 //retry: /* serialize w.r.t.extendfs() */
773 mapSize = bmp->db_mapsize;
774
775 /* the hint should be within the map */
776 if (hint >= mapSize) {
777 jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map");
778 return -EIO;
779 }
780
781 /* if the number of blocks to be allocated is greater than the
782 * allocation group size, try to allocate anywhere.
783 */
784 if (l2nb > bmp->db_agl2size) {
785 IWRITE_LOCK(ipbmap);
786
787 rc = dbAllocAny(bmp, nblocks, l2nb, results);
788 if (rc == 0) {
789 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, *results,
790 nblocks);
791 }
792
793 goto write_unlock;
794 }
795
796 /*
797 * If no hint, let dbNextAG recommend an allocation group
798 */
799 if (hint == 0)
800 goto pref_ag;
801
802 /* we would like to allocate close to the hint. adjust the
803 * hint to the block following the hint since the allocators
804 * will start looking for free space starting at this point.
805 */
806 blkno = hint + 1;
807
808 if (blkno >= bmp->db_mapsize)
809 goto pref_ag;
810
811 agno = blkno >> bmp->db_agl2size;
812
813 /* check if blkno crosses over into a new allocation group.
814 * if so, check if we should allow allocations within this
815 * allocation group.
816 */
817 if ((blkno & (bmp->db_agsize - 1)) == 0)
818 /* check if the AG is currenly being written to.
819 * if so, call dbNextAG() to find a non-busy
820 * AG with sufficient free space.
821 */
822 if (atomic_read(&bmp->db_active[agno]))
823 goto pref_ag;
824
825 /* check if the allocation request size can be satisfied from a
826 * single dmap. if so, try to allocate from the dmap containing
827 * the hint using a tiered strategy.
828 */
829 if (nblocks <= BPERDMAP) {
830 IREAD_LOCK(ipbmap);
831
832 /* get the buffer for the dmap containing the hint.
833 */
834 rc = -EIO;
835 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
836 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
837 if (mp == NULL)
838 goto read_unlock;
839
840 dp = (struct dmap *) mp->data;
841
842 /* first, try to satisfy the allocation request with the
843 * blocks beginning at the hint.
844 */
845 if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks))
846 != -ENOSPC) {
847 if (rc == 0) {
848 *results = blkno;
849 DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
850 *results, nblocks);
851 mark_metapage_dirty(mp);
852 }
853
854 release_metapage(mp);
855 goto read_unlock;
856 }
857
858 writers = atomic_read(&bmp->db_active[agno]);
859 if ((writers > 1) ||
860 ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) {
861 /*
862 * Someone else is writing in this allocation
863 * group. To avoid fragmenting, try another ag
864 */
865 release_metapage(mp);
866 IREAD_UNLOCK(ipbmap);
867 goto pref_ag;
868 }
869
870 /* next, try to satisfy the allocation request with blocks
871 * near the hint.
872 */
873 if ((rc =
874 dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results))
875 != -ENOSPC) {
876 if (rc == 0) {
877 DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
878 *results, nblocks);
879 mark_metapage_dirty(mp);
880 }
881
882 release_metapage(mp);
883 goto read_unlock;
884 }
885
886 /* try to satisfy the allocation request with blocks within
887 * the same dmap as the hint.
888 */
889 if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
890 != -ENOSPC) {
891 if (rc == 0) {
892 DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
893 *results, nblocks);
894 mark_metapage_dirty(mp);
895 }
896
897 release_metapage(mp);
898 goto read_unlock;
899 }
900
901 release_metapage(mp);
902 IREAD_UNLOCK(ipbmap);
903 }
904
905 /* try to satisfy the allocation request with blocks within
906 * the same allocation group as the hint.
907 */
908 IWRITE_LOCK(ipbmap);
909 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results))
910 != -ENOSPC) {
911 if (rc == 0)
912 DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
913 *results, nblocks);
914 goto write_unlock;
915 }
916 IWRITE_UNLOCK(ipbmap);
917
918
919 pref_ag:
920 /*
921 * Let dbNextAG recommend a preferred allocation group
922 */
923 agno = dbNextAG(ipbmap);
924 IWRITE_LOCK(ipbmap);
925
926 /* Try to allocate within this allocation group. if that fails, try to
927 * allocate anywhere in the map.
928 */
929 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC)
930 rc = dbAllocAny(bmp, nblocks, l2nb, results);
931 if (rc == 0) {
932 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, *results, nblocks);
933 }
934
935 write_unlock:
936 IWRITE_UNLOCK(ipbmap);
937
938 return (rc);
939
940 read_unlock:
941 IREAD_UNLOCK(ipbmap);
942
943 return (rc);
944 }
945
946 #ifdef _NOTYET
947 /*
948 * NAME: dbAllocExact()
949 *
950 * FUNCTION: try to allocate the requested extent;
951 *
952 * PARAMETERS:
953 * ip - pointer to in-core inode;
954 * blkno - extent address;
955 * nblocks - extent length;
956 *
957 * RETURN VALUES:
958 * 0 - success
959 * -ENOSPC - insufficient disk resources
960 * -EIO - i/o error
961 */
dbAllocExact(struct inode * ip,s64 blkno,int nblocks)962 int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
963 {
964 int rc;
965 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
966 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
967 struct dmap *dp;
968 s64 lblkno;
969 struct metapage *mp;
970
971 IREAD_LOCK(ipbmap);
972
973 /*
974 * validate extent request:
975 *
976 * note: defragfs policy:
977 * max 64 blocks will be moved.
978 * allocation request size must be satisfied from a single dmap.
979 */
980 if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) {
981 IREAD_UNLOCK(ipbmap);
982 return -EINVAL;
983 }
984
985 if (nblocks > ((s64) 1 << bmp->db_maxfreebud)) {
986 /* the free space is no longer available */
987 IREAD_UNLOCK(ipbmap);
988 return -ENOSPC;
989 }
990
991 /* read in the dmap covering the extent */
992 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
993 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
994 if (mp == NULL) {
995 IREAD_UNLOCK(ipbmap);
996 return -EIO;
997 }
998 dp = (struct dmap *) mp->data;
999
1000 /* try to allocate the requested extent */
1001 rc = dbAllocNext(bmp, dp, blkno, nblocks);
1002
1003 IREAD_UNLOCK(ipbmap);
1004
1005 if (rc == 0) {
1006 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, blkno, nblocks);
1007 mark_metapage_dirty(mp);
1008 }
1009 release_metapage(mp);
1010
1011 return (rc);
1012 }
1013 #endif /* _NOTYET */
1014
1015 /*
1016 * NAME: dbReAlloc()
1017 *
1018 * FUNCTION: attempt to extend a current allocation by a specified
1019 * number of blocks.
1020 *
1021 * this routine attempts to satisfy the allocation request
1022 * by first trying to extend the existing allocation in
1023 * place by allocating the additional blocks as the blocks
1024 * immediately following the current allocation. if these
1025 * blocks are not available, this routine will attempt to
1026 * allocate a new set of contiguous blocks large enough
1027 * to cover the existing allocation plus the additional
1028 * number of blocks required.
1029 *
1030 * PARAMETERS:
1031 * ip - pointer to in-core inode requiring allocation.
1032 * blkno - starting block of the current allocation.
1033 * nblocks - number of contiguous blocks within the current
1034 * allocation.
1035 * addnblocks - number of blocks to add to the allocation.
1036 * results - on successful return, set to the starting block number
1037 * of the existing allocation if the existing allocation
1038 * was extended in place or to a newly allocated contiguous
1039 * range if the existing allocation could not be extended
1040 * in place.
1041 *
1042 * RETURN VALUES:
1043 * 0 - success
1044 * -ENOSPC - insufficient disk resources
1045 * -EIO - i/o error
1046 */
1047 int
dbReAlloc(struct inode * ip,s64 blkno,s64 nblocks,s64 addnblocks,s64 * results)1048 dbReAlloc(struct inode *ip,
1049 s64 blkno, s64 nblocks, s64 addnblocks, s64 * results)
1050 {
1051 int rc;
1052
1053 /* try to extend the allocation in place.
1054 */
1055 if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) {
1056 *results = blkno;
1057 return (0);
1058 } else {
1059 if (rc != -ENOSPC)
1060 return (rc);
1061 }
1062
1063 /* could not extend the allocation in place, so allocate a
1064 * new set of blocks for the entire request (i.e. try to get
1065 * a range of contiguous blocks large enough to cover the
1066 * existing allocation plus the additional blocks.)
1067 */
1068 return (dbAlloc
1069 (ip, blkno + nblocks - 1, addnblocks + nblocks, results));
1070 }
1071
1072
1073 /*
1074 * NAME: dbExtend()
1075 *
1076 * FUNCTION: attempt to extend a current allocation by a specified
1077 * number of blocks.
1078 *
1079 * this routine attempts to satisfy the allocation request
1080 * by first trying to extend the existing allocation in
1081 * place by allocating the additional blocks as the blocks
1082 * immediately following the current allocation.
1083 *
1084 * PARAMETERS:
1085 * ip - pointer to in-core inode requiring allocation.
1086 * blkno - starting block of the current allocation.
1087 * nblocks - number of contiguous blocks within the current
1088 * allocation.
1089 * addnblocks - number of blocks to add to the allocation.
1090 *
1091 * RETURN VALUES:
1092 * 0 - success
1093 * -ENOSPC - insufficient disk resources
1094 * -EIO - i/o error
1095 */
dbExtend(struct inode * ip,s64 blkno,s64 nblocks,s64 addnblocks)1096 static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1097 {
1098 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
1099 s64 lblkno, lastblkno, extblkno;
1100 uint rel_block;
1101 struct metapage *mp;
1102 struct dmap *dp;
1103 int rc;
1104 struct inode *ipbmap = sbi->ipbmap;
1105 struct bmap *bmp;
1106
1107 /*
1108 * We don't want a non-aligned extent to cross a page boundary
1109 */
1110 if (((rel_block = blkno & (sbi->nbperpage - 1))) &&
1111 (rel_block + nblocks + addnblocks > sbi->nbperpage))
1112 return -ENOSPC;
1113
1114 /* get the last block of the current allocation */
1115 lastblkno = blkno + nblocks - 1;
1116
1117 /* determine the block number of the block following
1118 * the existing allocation.
1119 */
1120 extblkno = lastblkno + 1;
1121
1122 IREAD_LOCK(ipbmap);
1123
1124 /* better be within the file system */
1125 bmp = sbi->bmap;
1126 if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) {
1127 IREAD_UNLOCK(ipbmap);
1128 jfs_error(ip->i_sb,
1129 "dbExtend: the block is outside the filesystem");
1130 return -EIO;
1131 }
1132
1133 /* we'll attempt to extend the current allocation in place by
1134 * allocating the additional blocks as the blocks immediately
1135 * following the current allocation. we only try to extend the
1136 * current allocation in place if the number of additional blocks
1137 * can fit into a dmap, the last block of the current allocation
1138 * is not the last block of the file system, and the start of the
1139 * inplace extension is not on an allocation group boundry.
1140 */
1141 if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize ||
1142 (extblkno & (bmp->db_agsize - 1)) == 0) {
1143 IREAD_UNLOCK(ipbmap);
1144 return -ENOSPC;
1145 }
1146
1147 /* get the buffer for the dmap containing the first block
1148 * of the extension.
1149 */
1150 lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage);
1151 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
1152 if (mp == NULL) {
1153 IREAD_UNLOCK(ipbmap);
1154 return -EIO;
1155 }
1156
1157 DBALLOCCK(bmp->db_DBmap, bmp->db_mapsize, blkno, nblocks);
1158 dp = (struct dmap *) mp->data;
1159
1160 /* try to allocate the blocks immediately following the
1161 * current allocation.
1162 */
1163 rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks);
1164
1165 IREAD_UNLOCK(ipbmap);
1166
1167 /* were we successful ? */
1168 if (rc == 0) {
1169 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, extblkno,
1170 addnblocks);
1171 write_metapage(mp);
1172 } else
1173 /* we were not successful */
1174 release_metapage(mp);
1175
1176
1177 return (rc);
1178 }
1179
1180
1181 /*
1182 * NAME: dbAllocNext()
1183 *
1184 * FUNCTION: attempt to allocate the blocks of the specified block
1185 * range within a dmap.
1186 *
1187 * PARAMETERS:
1188 * bmp - pointer to bmap descriptor
1189 * dp - pointer to dmap.
1190 * blkno - starting block number of the range.
1191 * nblocks - number of contiguous free blocks of the range.
1192 *
1193 * RETURN VALUES:
1194 * 0 - success
1195 * -ENOSPC - insufficient disk resources
1196 * -EIO - i/o error
1197 *
1198 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
1199 */
dbAllocNext(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks)1200 static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
1201 int nblocks)
1202 {
1203 int dbitno, word, rembits, nb, nwords, wbitno, nw;
1204 int l2size;
1205 s8 *leaf;
1206 u32 mask;
1207
1208 if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
1209 jfs_error(bmp->db_ipbmap->i_sb,
1210 "dbAllocNext: Corrupt dmap page");
1211 return -EIO;
1212 }
1213
1214 /* pick up a pointer to the leaves of the dmap tree.
1215 */
1216 leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
1217
1218 /* determine the bit number and word within the dmap of the
1219 * starting block.
1220 */
1221 dbitno = blkno & (BPERDMAP - 1);
1222 word = dbitno >> L2DBWORD;
1223
1224 /* check if the specified block range is contained within
1225 * this dmap.
1226 */
1227 if (dbitno + nblocks > BPERDMAP)
1228 return -ENOSPC;
1229
1230 /* check if the starting leaf indicates that anything
1231 * is free.
1232 */
1233 if (leaf[word] == NOFREE)
1234 return -ENOSPC;
1235
1236 /* check the dmaps words corresponding to block range to see
1237 * if the block range is free. not all bits of the first and
1238 * last words may be contained within the block range. if this
1239 * is the case, we'll work against those words (i.e. partial first
1240 * and/or last) on an individual basis (a single pass) and examine
1241 * the actual bits to determine if they are free. a single pass
1242 * will be used for all dmap words fully contained within the
1243 * specified range. within this pass, the leaves of the dmap
1244 * tree will be examined to determine if the blocks are free. a
1245 * single leaf may describe the free space of multiple dmap
1246 * words, so we may visit only a subset of the actual leaves
1247 * corresponding to the dmap words of the block range.
1248 */
1249 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
1250 /* determine the bit number within the word and
1251 * the number of bits within the word.
1252 */
1253 wbitno = dbitno & (DBWORD - 1);
1254 nb = min(rembits, DBWORD - wbitno);
1255
1256 /* check if only part of the word is to be examined.
1257 */
1258 if (nb < DBWORD) {
1259 /* check if the bits are free.
1260 */
1261 mask = (ONES << (DBWORD - nb) >> wbitno);
1262 if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask)
1263 return -ENOSPC;
1264
1265 word += 1;
1266 } else {
1267 /* one or more dmap words are fully contained
1268 * within the block range. determine how many
1269 * words and how many bits.
1270 */
1271 nwords = rembits >> L2DBWORD;
1272 nb = nwords << L2DBWORD;
1273
1274 /* now examine the appropriate leaves to determine
1275 * if the blocks are free.
1276 */
1277 while (nwords > 0) {
1278 /* does the leaf describe any free space ?
1279 */
1280 if (leaf[word] < BUDMIN)
1281 return -ENOSPC;
1282
1283 /* determine the l2 number of bits provided
1284 * by this leaf.
1285 */
1286 l2size =
1287 min((int)leaf[word], NLSTOL2BSZ(nwords));
1288
1289 /* determine how many words were handled.
1290 */
1291 nw = BUDSIZE(l2size, BUDMIN);
1292
1293 nwords -= nw;
1294 word += nw;
1295 }
1296 }
1297 }
1298
1299 /* allocate the blocks.
1300 */
1301 return (dbAllocDmap(bmp, dp, blkno, nblocks));
1302 }
1303
1304
1305 /*
1306 * NAME: dbAllocNear()
1307 *
1308 * FUNCTION: attempt to allocate a number of contiguous free blocks near
1309 * a specified block (hint) within a dmap.
1310 *
1311 * starting with the dmap leaf that covers the hint, we'll
1312 * check the next four contiguous leaves for sufficient free
1313 * space. if sufficient free space is found, we'll allocate
1314 * the desired free space.
1315 *
1316 * PARAMETERS:
1317 * bmp - pointer to bmap descriptor
1318 * dp - pointer to dmap.
1319 * blkno - block number to allocate near.
1320 * nblocks - actual number of contiguous free blocks desired.
1321 * l2nb - log2 number of contiguous free blocks desired.
1322 * results - on successful return, set to the starting block number
1323 * of the newly allocated range.
1324 *
1325 * RETURN VALUES:
1326 * 0 - success
1327 * -ENOSPC - insufficient disk resources
1328 * -EIO - i/o error
1329 *
1330 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
1331 */
1332 static int
dbAllocNear(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks,int l2nb,s64 * results)1333 dbAllocNear(struct bmap * bmp,
1334 struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results)
1335 {
1336 int word, lword, rc;
1337 s8 *leaf;
1338
1339 if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
1340 jfs_error(bmp->db_ipbmap->i_sb,
1341 "dbAllocNear: Corrupt dmap page");
1342 return -EIO;
1343 }
1344
1345 leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
1346
1347 /* determine the word within the dmap that holds the hint
1348 * (i.e. blkno). also, determine the last word in the dmap
1349 * that we'll include in our examination.
1350 */
1351 word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
1352 lword = min(word + 4, LPERDMAP);
1353
1354 /* examine the leaves for sufficient free space.
1355 */
1356 for (; word < lword; word++) {
1357 /* does the leaf describe sufficient free space ?
1358 */
1359 if (leaf[word] < l2nb)
1360 continue;
1361
1362 /* determine the block number within the file system
1363 * of the first block described by this dmap word.
1364 */
1365 blkno = le64_to_cpu(dp->start) + (word << L2DBWORD);
1366
1367 /* if not all bits of the dmap word are free, get the
1368 * starting bit number within the dmap word of the required
1369 * string of free bits and adjust the block number with the
1370 * value.
1371 */
1372 if (leaf[word] < BUDMIN)
1373 blkno +=
1374 dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb);
1375
1376 /* allocate the blocks.
1377 */
1378 if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
1379 *results = blkno;
1380
1381 return (rc);
1382 }
1383
1384 return -ENOSPC;
1385 }
1386
1387
1388 /*
1389 * NAME: dbAllocAG()
1390 *
1391 * FUNCTION: attempt to allocate the specified number of contiguous
1392 * free blocks within the specified allocation group.
1393 *
1394 * unless the allocation group size is equal to the number
1395 * of blocks per dmap, the dmap control pages will be used to
1396 * find the required free space, if available. we start the
1397 * search at the highest dmap control page level which
1398 * distinctly describes the allocation group's free space
1399 * (i.e. the highest level at which the allocation group's
1400 * free space is not mixed in with that of any other group).
1401 * in addition, we start the search within this level at a
1402 * height of the dmapctl dmtree at which the nodes distinctly
1403 * describe the allocation group's free space. at this height,
1404 * the allocation group's free space may be represented by 1
1405 * or two sub-trees, depending on the allocation group size.
1406 * we search the top nodes of these subtrees left to right for
1407 * sufficient free space. if sufficient free space is found,
1408 * the subtree is searched to find the leftmost leaf that
1409 * has free space. once we have made it to the leaf, we
1410 * move the search to the next lower level dmap control page
1411 * corresponding to this leaf. we continue down the dmap control
1412 * pages until we find the dmap that contains or starts the
1413 * sufficient free space and we allocate at this dmap.
1414 *
1415 * if the allocation group size is equal to the dmap size,
1416 * we'll start at the dmap corresponding to the allocation
1417 * group and attempt the allocation at this level.
1418 *
1419 * the dmap control page search is also not performed if the
1420 * allocation group is completely free and we go to the first
1421 * dmap of the allocation group to do the allocation. this is
1422 * done because the allocation group may be part (not the first
1423 * part) of a larger binary buddy system, causing the dmap
1424 * control pages to indicate no free space (NOFREE) within
1425 * the allocation group.
1426 *
1427 * PARAMETERS:
1428 * bmp - pointer to bmap descriptor
1429 * agno - allocation group number.
1430 * nblocks - actual number of contiguous free blocks desired.
1431 * l2nb - log2 number of contiguous free blocks desired.
1432 * results - on successful return, set to the starting block number
1433 * of the newly allocated range.
1434 *
1435 * RETURN VALUES:
1436 * 0 - success
1437 * -ENOSPC - insufficient disk resources
1438 * -EIO - i/o error
1439 *
1440 * note: IWRITE_LOCK(ipmap) held on entry/exit;
1441 */
1442 static int
dbAllocAG(struct bmap * bmp,int agno,s64 nblocks,int l2nb,s64 * results)1443 dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1444 {
1445 struct metapage *mp;
1446 struct dmapctl *dcp;
1447 int rc, ti, i, k, m, n, agperlev;
1448 s64 blkno, lblkno;
1449 int budmin;
1450
1451 /* allocation request should not be for more than the
1452 * allocation group size.
1453 */
1454 if (l2nb > bmp->db_agl2size) {
1455 jfs_error(bmp->db_ipbmap->i_sb,
1456 "dbAllocAG: allocation request is larger than the "
1457 "allocation group size");
1458 return -EIO;
1459 }
1460
1461 /* determine the starting block number of the allocation
1462 * group.
1463 */
1464 blkno = (s64) agno << bmp->db_agl2size;
1465
1466 /* check if the allocation group size is the minimum allocation
1467 * group size or if the allocation group is completely free. if
1468 * the allocation group size is the minimum size of BPERDMAP (i.e.
1469 * 1 dmap), there is no need to search the dmap control page (below)
1470 * that fully describes the allocation group since the allocation
1471 * group is already fully described by a dmap. in this case, we
1472 * just call dbAllocCtl() to search the dmap tree and allocate the
1473 * required space if available.
1474 *
1475 * if the allocation group is completely free, dbAllocCtl() is
1476 * also called to allocate the required space. this is done for
1477 * two reasons. first, it makes no sense searching the dmap control
1478 * pages for free space when we know that free space exists. second,
1479 * the dmap control pages may indicate that the allocation group
1480 * has no free space if the allocation group is part (not the first
1481 * part) of a larger binary buddy system.
1482 */
1483 if (bmp->db_agsize == BPERDMAP
1484 || bmp->db_agfree[agno] == bmp->db_agsize) {
1485 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
1486 if ((rc == -ENOSPC) &&
1487 (bmp->db_agfree[agno] == bmp->db_agsize)) {
1488 printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n",
1489 (unsigned long long) blkno,
1490 (unsigned long long) nblocks);
1491 jfs_error(bmp->db_ipbmap->i_sb,
1492 "dbAllocAG: dbAllocCtl failed in free AG");
1493 }
1494 return (rc);
1495 }
1496
1497 /* the buffer for the dmap control page that fully describes the
1498 * allocation group.
1499 */
1500 lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel);
1501 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1502 if (mp == NULL)
1503 return -EIO;
1504 dcp = (struct dmapctl *) mp->data;
1505 budmin = dcp->budmin;
1506
1507 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
1508 jfs_error(bmp->db_ipbmap->i_sb,
1509 "dbAllocAG: Corrupt dmapctl page");
1510 release_metapage(mp);
1511 return -EIO;
1512 }
1513
1514 /* search the subtree(s) of the dmap control page that describes
1515 * the allocation group, looking for sufficient free space. to begin,
1516 * determine how many allocation groups are represented in a dmap
1517 * control page at the control page level (i.e. L0, L1, L2) that
1518 * fully describes an allocation group. next, determine the starting
1519 * tree index of this allocation group within the control page.
1520 */
1521 agperlev =
1522 (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth;
1523 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
1524
1525 /* dmap control page trees fan-out by 4 and a single allocation
1526 * group may be described by 1 or 2 subtrees within the ag level
1527 * dmap control page, depending upon the ag size. examine the ag's
1528 * subtrees for sufficient free space, starting with the leftmost
1529 * subtree.
1530 */
1531 for (i = 0; i < bmp->db_agwidth; i++, ti++) {
1532 /* is there sufficient free space ?
1533 */
1534 if (l2nb > dcp->stree[ti])
1535 continue;
1536
1537 /* sufficient free space found in a subtree. now search down
1538 * the subtree to find the leftmost leaf that describes this
1539 * free space.
1540 */
1541 for (k = bmp->db_agheigth; k > 0; k--) {
1542 for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
1543 if (l2nb <= dcp->stree[m + n]) {
1544 ti = m + n;
1545 break;
1546 }
1547 }
1548 if (n == 4) {
1549 jfs_error(bmp->db_ipbmap->i_sb,
1550 "dbAllocAG: failed descending stree");
1551 release_metapage(mp);
1552 return -EIO;
1553 }
1554 }
1555
1556 /* determine the block number within the file system
1557 * that corresponds to this leaf.
1558 */
1559 if (bmp->db_aglevel == 2)
1560 blkno = 0;
1561 else if (bmp->db_aglevel == 1)
1562 blkno &= ~(MAXL1SIZE - 1);
1563 else /* bmp->db_aglevel == 0 */
1564 blkno &= ~(MAXL0SIZE - 1);
1565
1566 blkno +=
1567 ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin;
1568
1569 /* release the buffer in preparation for going down
1570 * the next level of dmap control pages.
1571 */
1572 release_metapage(mp);
1573
1574 /* check if we need to continue to search down the lower
1575 * level dmap control pages. we need to if the number of
1576 * blocks required is less than maximum number of blocks
1577 * described at the next lower level.
1578 */
1579 if (l2nb < budmin) {
1580
1581 /* search the lower level dmap control pages to get
1582 * the starting block number of the the dmap that
1583 * contains or starts off the free space.
1584 */
1585 if ((rc =
1586 dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1,
1587 &blkno))) {
1588 if (rc == -ENOSPC) {
1589 jfs_error(bmp->db_ipbmap->i_sb,
1590 "dbAllocAG: control page "
1591 "inconsistent");
1592 return -EIO;
1593 }
1594 return (rc);
1595 }
1596 }
1597
1598 /* allocate the blocks.
1599 */
1600 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
1601 if (rc == -ENOSPC) {
1602 jfs_error(bmp->db_ipbmap->i_sb,
1603 "dbAllocAG: unable to allocate blocks");
1604 rc = -EIO;
1605 }
1606 return (rc);
1607 }
1608
1609 /* no space in the allocation group. release the buffer and
1610 * return -ENOSPC.
1611 */
1612 release_metapage(mp);
1613
1614 return -ENOSPC;
1615 }
1616
1617
1618 /*
1619 * NAME: dbAllocAny()
1620 *
1621 * FUNCTION: attempt to allocate the specified number of contiguous
1622 * free blocks anywhere in the file system.
1623 *
1624 * dbAllocAny() attempts to find the sufficient free space by
1625 * searching down the dmap control pages, starting with the
1626 * highest level (i.e. L0, L1, L2) control page. if free space
1627 * large enough to satisfy the desired free space is found, the
1628 * desired free space is allocated.
1629 *
1630 * PARAMETERS:
1631 * bmp - pointer to bmap descriptor
1632 * nblocks - actual number of contiguous free blocks desired.
1633 * l2nb - log2 number of contiguous free blocks desired.
1634 * results - on successful return, set to the starting block number
1635 * of the newly allocated range.
1636 *
1637 * RETURN VALUES:
1638 * 0 - success
1639 * -ENOSPC - insufficient disk resources
1640 * -EIO - i/o error
1641 *
1642 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1643 */
dbAllocAny(struct bmap * bmp,s64 nblocks,int l2nb,s64 * results)1644 static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
1645 {
1646 int rc;
1647 s64 blkno = 0;
1648
1649 /* starting with the top level dmap control page, search
1650 * down the dmap control levels for sufficient free space.
1651 * if free space is found, dbFindCtl() returns the starting
1652 * block number of the dmap that contains or starts off the
1653 * range of free space.
1654 */
1655 if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno)))
1656 return (rc);
1657
1658 /* allocate the blocks.
1659 */
1660 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
1661 if (rc == -ENOSPC) {
1662 jfs_error(bmp->db_ipbmap->i_sb,
1663 "dbAllocAny: unable to allocate blocks");
1664 return -EIO;
1665 }
1666 return (rc);
1667 }
1668
1669
1670 /*
1671 * NAME: dbFindCtl()
1672 *
1673 * FUNCTION: starting at a specified dmap control page level and block
1674 * number, search down the dmap control levels for a range of
1675 * contiguous free blocks large enough to satisfy an allocation
1676 * request for the specified number of free blocks.
1677 *
1678 * if sufficient contiguous free blocks are found, this routine
1679 * returns the starting block number within a dmap page that
1680 * contains or starts a range of contiqious free blocks that
1681 * is sufficient in size.
1682 *
1683 * PARAMETERS:
1684 * bmp - pointer to bmap descriptor
1685 * level - starting dmap control page level.
1686 * l2nb - log2 number of contiguous free blocks desired.
1687 * *blkno - on entry, starting block number for conducting the search.
1688 * on successful return, the first block within a dmap page
1689 * that contains or starts a range of contiguous free blocks.
1690 *
1691 * RETURN VALUES:
1692 * 0 - success
1693 * -ENOSPC - insufficient disk resources
1694 * -EIO - i/o error
1695 *
1696 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1697 */
dbFindCtl(struct bmap * bmp,int l2nb,int level,s64 * blkno)1698 static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1699 {
1700 int rc, leafidx, lev;
1701 s64 b, lblkno;
1702 struct dmapctl *dcp;
1703 int budmin;
1704 struct metapage *mp;
1705
1706 /* starting at the specified dmap control page level and block
1707 * number, search down the dmap control levels for the starting
1708 * block number of a dmap page that contains or starts off
1709 * sufficient free blocks.
1710 */
1711 for (lev = level, b = *blkno; lev >= 0; lev--) {
1712 /* get the buffer of the dmap control page for the block
1713 * number and level (i.e. L0, L1, L2).
1714 */
1715 lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev);
1716 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1717 if (mp == NULL)
1718 return -EIO;
1719 dcp = (struct dmapctl *) mp->data;
1720 budmin = dcp->budmin;
1721
1722 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
1723 jfs_error(bmp->db_ipbmap->i_sb,
1724 "dbFindCtl: Corrupt dmapctl page");
1725 release_metapage(mp);
1726 return -EIO;
1727 }
1728
1729 /* search the tree within the dmap control page for
1730 * sufficent free space. if sufficient free space is found,
1731 * dbFindLeaf() returns the index of the leaf at which
1732 * free space was found.
1733 */
1734 rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx);
1735
1736 /* release the buffer.
1737 */
1738 release_metapage(mp);
1739
1740 /* space found ?
1741 */
1742 if (rc) {
1743 if (lev != level) {
1744 jfs_error(bmp->db_ipbmap->i_sb,
1745 "dbFindCtl: dmap inconsistent");
1746 return -EIO;
1747 }
1748 return -ENOSPC;
1749 }
1750
1751 /* adjust the block number to reflect the location within
1752 * the dmap control page (i.e. the leaf) at which free
1753 * space was found.
1754 */
1755 b += (((s64) leafidx) << budmin);
1756
1757 /* we stop the search at this dmap control page level if
1758 * the number of blocks required is greater than or equal
1759 * to the maximum number of blocks described at the next
1760 * (lower) level.
1761 */
1762 if (l2nb >= budmin)
1763 break;
1764 }
1765
1766 *blkno = b;
1767 return (0);
1768 }
1769
1770
1771 /*
1772 * NAME: dbAllocCtl()
1773 *
1774 * FUNCTION: attempt to allocate a specified number of contiguous
1775 * blocks starting within a specific dmap.
1776 *
1777 * this routine is called by higher level routines that search
1778 * the dmap control pages above the actual dmaps for contiguous
1779 * free space. the result of successful searches by these
1780 * routines are the starting block numbers within dmaps, with
1781 * the dmaps themselves containing the desired contiguous free
1782 * space or starting a contiguous free space of desired size
1783 * that is made up of the blocks of one or more dmaps. these
1784 * calls should not fail due to insufficent resources.
1785 *
1786 * this routine is called in some cases where it is not known
1787 * whether it will fail due to insufficient resources. more
1788 * specifically, this occurs when allocating from an allocation
1789 * group whose size is equal to the number of blocks per dmap.
1790 * in this case, the dmap control pages are not examined prior
1791 * to calling this routine (to save pathlength) and the call
1792 * might fail.
1793 *
1794 * for a request size that fits within a dmap, this routine relies
1795 * upon the dmap's dmtree to find the requested contiguous free
1796 * space. for request sizes that are larger than a dmap, the
1797 * requested free space will start at the first block of the
1798 * first dmap (i.e. blkno).
1799 *
1800 * PARAMETERS:
1801 * bmp - pointer to bmap descriptor
1802 * nblocks - actual number of contiguous free blocks to allocate.
1803 * l2nb - log2 number of contiguous free blocks to allocate.
1804 * blkno - starting block number of the dmap to start the allocation
1805 * from.
1806 * results - on successful return, set to the starting block number
1807 * of the newly allocated range.
1808 *
1809 * RETURN VALUES:
1810 * 0 - success
1811 * -ENOSPC - insufficient disk resources
1812 * -EIO - i/o error
1813 *
1814 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1815 */
1816 static int
dbAllocCtl(struct bmap * bmp,s64 nblocks,int l2nb,s64 blkno,s64 * results)1817 dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
1818 {
1819 int rc, nb;
1820 s64 b, lblkno, n;
1821 struct metapage *mp;
1822 struct dmap *dp;
1823
1824 /* check if the allocation request is confined to a single dmap.
1825 */
1826 if (l2nb <= L2BPERDMAP) {
1827 /* get the buffer for the dmap.
1828 */
1829 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
1830 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1831 if (mp == NULL)
1832 return -EIO;
1833 dp = (struct dmap *) mp->data;
1834
1835 /* try to allocate the blocks.
1836 */
1837 rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results);
1838 if (rc == 0)
1839 mark_metapage_dirty(mp);
1840
1841 release_metapage(mp);
1842
1843 return (rc);
1844 }
1845
1846 /* allocation request involving multiple dmaps. it must start on
1847 * a dmap boundary.
1848 */
1849 assert((blkno & (BPERDMAP - 1)) == 0);
1850
1851 /* allocate the blocks dmap by dmap.
1852 */
1853 for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) {
1854 /* get the buffer for the dmap.
1855 */
1856 lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
1857 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1858 if (mp == NULL) {
1859 rc = -EIO;
1860 goto backout;
1861 }
1862 dp = (struct dmap *) mp->data;
1863
1864 /* the dmap better be all free.
1865 */
1866 if (dp->tree.stree[ROOT] != L2BPERDMAP) {
1867 release_metapage(mp);
1868 jfs_error(bmp->db_ipbmap->i_sb,
1869 "dbAllocCtl: the dmap is not all free");
1870 rc = -EIO;
1871 goto backout;
1872 }
1873
1874 /* determine how many blocks to allocate from this dmap.
1875 */
1876 nb = min(n, (s64)BPERDMAP);
1877
1878 /* allocate the blocks from the dmap.
1879 */
1880 if ((rc = dbAllocDmap(bmp, dp, b, nb))) {
1881 release_metapage(mp);
1882 goto backout;
1883 }
1884
1885 /* write the buffer.
1886 */
1887 write_metapage(mp);
1888 }
1889
1890 /* set the results (starting block number) and return.
1891 */
1892 *results = blkno;
1893 return (0);
1894
1895 /* something failed in handling an allocation request involving
1896 * multiple dmaps. we'll try to clean up by backing out any
1897 * allocation that has already happened for this request. if
1898 * we fail in backing out the allocation, we'll mark the file
1899 * system to indicate that blocks have been leaked.
1900 */
1901 backout:
1902
1903 /* try to backout the allocations dmap by dmap.
1904 */
1905 for (n = nblocks - n, b = blkno; n > 0;
1906 n -= BPERDMAP, b += BPERDMAP) {
1907 /* get the buffer for this dmap.
1908 */
1909 lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
1910 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1911 if (mp == NULL) {
1912 /* could not back out. mark the file system
1913 * to indicate that we have leaked blocks.
1914 */
1915 jfs_error(bmp->db_ipbmap->i_sb,
1916 "dbAllocCtl: I/O Error: Block Leakage.");
1917 continue;
1918 }
1919 dp = (struct dmap *) mp->data;
1920
1921 /* free the blocks is this dmap.
1922 */
1923 if (dbFreeDmap(bmp, dp, b, BPERDMAP)) {
1924 /* could not back out. mark the file system
1925 * to indicate that we have leaked blocks.
1926 */
1927 release_metapage(mp);
1928 jfs_error(bmp->db_ipbmap->i_sb,
1929 "dbAllocCtl: Block Leakage.");
1930 continue;
1931 }
1932
1933 /* write the buffer.
1934 */
1935 write_metapage(mp);
1936 }
1937
1938 return (rc);
1939 }
1940
1941
1942 /*
1943 * NAME: dbAllocDmapLev()
1944 *
1945 * FUNCTION: attempt to allocate a specified number of contiguous blocks
1946 * from a specified dmap.
1947 *
1948 * this routine checks if the contiguous blocks are available.
1949 * if so, nblocks of blocks are allocated; otherwise, ENOSPC is
1950 * returned.
1951 *
1952 * PARAMETERS:
1953 * mp - pointer to bmap descriptor
1954 * dp - pointer to dmap to attempt to allocate blocks from.
1955 * l2nb - log2 number of contiguous block desired.
1956 * nblocks - actual number of contiguous block desired.
1957 * results - on successful return, set to the starting block number
1958 * of the newly allocated range.
1959 *
1960 * RETURN VALUES:
1961 * 0 - success
1962 * -ENOSPC - insufficient disk resources
1963 * -EIO - i/o error
1964 *
1965 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
1966 * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
1967 */
1968 static int
dbAllocDmapLev(struct bmap * bmp,struct dmap * dp,int nblocks,int l2nb,s64 * results)1969 dbAllocDmapLev(struct bmap * bmp,
1970 struct dmap * dp, int nblocks, int l2nb, s64 * results)
1971 {
1972 s64 blkno;
1973 int leafidx, rc;
1974
1975 /* can't be more than a dmaps worth of blocks */
1976 assert(l2nb <= L2BPERDMAP);
1977
1978 /* search the tree within the dmap page for sufficient
1979 * free space. if sufficient free space is found, dbFindLeaf()
1980 * returns the index of the leaf at which free space was found.
1981 */
1982 if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx))
1983 return -ENOSPC;
1984
1985 /* determine the block number within the file system corresponding
1986 * to the leaf at which free space was found.
1987 */
1988 blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD);
1989
1990 /* if not all bits of the dmap word are free, get the starting
1991 * bit number within the dmap word of the required string of free
1992 * bits and adjust the block number with this value.
1993 */
1994 if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN)
1995 blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb);
1996
1997 /* allocate the blocks */
1998 if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
1999 *results = blkno;
2000
2001 return (rc);
2002 }
2003
2004
2005 /*
2006 * NAME: dbAllocDmap()
2007 *
2008 * FUNCTION: adjust the disk allocation map to reflect the allocation
2009 * of a specified block range within a dmap.
2010 *
2011 * this routine allocates the specified blocks from the dmap
2012 * through a call to dbAllocBits(). if the allocation of the
2013 * block range causes the maximum string of free blocks within
2014 * the dmap to change (i.e. the value of the root of the dmap's
2015 * dmtree), this routine will cause this change to be reflected
2016 * up through the appropriate levels of the dmap control pages
2017 * by a call to dbAdjCtl() for the L0 dmap control page that
2018 * covers this dmap.
2019 *
2020 * PARAMETERS:
2021 * bmp - pointer to bmap descriptor
2022 * dp - pointer to dmap to allocate the block range from.
2023 * blkno - starting block number of the block to be allocated.
2024 * nblocks - number of blocks to be allocated.
2025 *
2026 * RETURN VALUES:
2027 * 0 - success
2028 * -EIO - i/o error
2029 *
2030 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2031 */
dbAllocDmap(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks)2032 static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2033 int nblocks)
2034 {
2035 s8 oldroot;
2036 int rc;
2037
2038 /* save the current value of the root (i.e. maximum free string)
2039 * of the dmap tree.
2040 */
2041 oldroot = dp->tree.stree[ROOT];
2042
2043 /* allocate the specified (blocks) bits */
2044 dbAllocBits(bmp, dp, blkno, nblocks);
2045
2046 /* if the root has not changed, done. */
2047 if (dp->tree.stree[ROOT] == oldroot)
2048 return (0);
2049
2050 /* root changed. bubble the change up to the dmap control pages.
2051 * if the adjustment of the upper level control pages fails,
2052 * backout the bit allocation (thus making everything consistent).
2053 */
2054 if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0)))
2055 dbFreeBits(bmp, dp, blkno, nblocks);
2056
2057 return (rc);
2058 }
2059
2060
2061 /*
2062 * NAME: dbFreeDmap()
2063 *
2064 * FUNCTION: adjust the disk allocation map to reflect the allocation
2065 * of a specified block range within a dmap.
2066 *
2067 * this routine frees the specified blocks from the dmap through
2068 * a call to dbFreeBits(). if the deallocation of the block range
2069 * causes the maximum string of free blocks within the dmap to
2070 * change (i.e. the value of the root of the dmap's dmtree), this
2071 * routine will cause this change to be reflected up through the
2072 * appropriate levels of the dmap control pages by a call to
2073 * dbAdjCtl() for the L0 dmap control page that covers this dmap.
2074 *
2075 * PARAMETERS:
2076 * bmp - pointer to bmap descriptor
2077 * dp - pointer to dmap to free the block range from.
2078 * blkno - starting block number of the block to be freed.
2079 * nblocks - number of blocks to be freed.
2080 *
2081 * RETURN VALUES:
2082 * 0 - success
2083 * -EIO - i/o error
2084 *
2085 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2086 */
dbFreeDmap(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks)2087 static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2088 int nblocks)
2089 {
2090 s8 oldroot;
2091 int rc, word;
2092
2093 /* save the current value of the root (i.e. maximum free string)
2094 * of the dmap tree.
2095 */
2096 oldroot = dp->tree.stree[ROOT];
2097
2098 /* free the specified (blocks) bits */
2099 dbFreeBits(bmp, dp, blkno, nblocks);
2100
2101 /* if the root has not changed, done. */
2102 if (dp->tree.stree[ROOT] == oldroot)
2103 return (0);
2104
2105 /* root changed. bubble the change up to the dmap control pages.
2106 * if the adjustment of the upper level control pages fails,
2107 * backout the deallocation.
2108 */
2109 if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) {
2110 word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
2111
2112 /* as part of backing out the deallocation, we will have
2113 * to back split the dmap tree if the deallocation caused
2114 * the freed blocks to become part of a larger binary buddy
2115 * system.
2116 */
2117 if (dp->tree.stree[word] == NOFREE)
2118 dbBackSplit((dmtree_t *) & dp->tree, word);
2119
2120 dbAllocBits(bmp, dp, blkno, nblocks);
2121 }
2122
2123 return (rc);
2124 }
2125
2126
2127 /*
2128 * NAME: dbAllocBits()
2129 *
2130 * FUNCTION: allocate a specified block range from a dmap.
2131 *
2132 * this routine updates the dmap to reflect the working
2133 * state allocation of the specified block range. it directly
2134 * updates the bits of the working map and causes the adjustment
2135 * of the binary buddy system described by the dmap's dmtree
2136 * leaves to reflect the bits allocated. it also causes the
2137 * dmap's dmtree, as a whole, to reflect the allocated range.
2138 *
2139 * PARAMETERS:
2140 * bmp - pointer to bmap descriptor
2141 * dp - pointer to dmap to allocate bits from.
2142 * blkno - starting block number of the bits to be allocated.
2143 * nblocks - number of bits to be allocated.
2144 *
2145 * RETURN VALUES: none
2146 *
2147 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2148 */
dbAllocBits(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks)2149 static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2150 int nblocks)
2151 {
2152 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
2153 dmtree_t *tp = (dmtree_t *) & dp->tree;
2154 int size;
2155 s8 *leaf;
2156
2157 /* pick up a pointer to the leaves of the dmap tree */
2158 leaf = dp->tree.stree + LEAFIND;
2159
2160 /* determine the bit number and word within the dmap of the
2161 * starting block.
2162 */
2163 dbitno = blkno & (BPERDMAP - 1);
2164 word = dbitno >> L2DBWORD;
2165
2166 /* block range better be within the dmap */
2167 assert(dbitno + nblocks <= BPERDMAP);
2168
2169 /* allocate the bits of the dmap's words corresponding to the block
2170 * range. not all bits of the first and last words may be contained
2171 * within the block range. if this is the case, we'll work against
2172 * those words (i.e. partial first and/or last) on an individual basis
2173 * (a single pass), allocating the bits of interest by hand and
2174 * updating the leaf corresponding to the dmap word. a single pass
2175 * will be used for all dmap words fully contained within the
2176 * specified range. within this pass, the bits of all fully contained
2177 * dmap words will be marked as free in a single shot and the leaves
2178 * will be updated. a single leaf may describe the free space of
2179 * multiple dmap words, so we may update only a subset of the actual
2180 * leaves corresponding to the dmap words of the block range.
2181 */
2182 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
2183 /* determine the bit number within the word and
2184 * the number of bits within the word.
2185 */
2186 wbitno = dbitno & (DBWORD - 1);
2187 nb = min(rembits, DBWORD - wbitno);
2188
2189 /* check if only part of a word is to be allocated.
2190 */
2191 if (nb < DBWORD) {
2192 /* allocate (set to 1) the appropriate bits within
2193 * this dmap word.
2194 */
2195 dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
2196 >> wbitno);
2197
2198 /* update the leaf for this dmap word. in addition
2199 * to setting the leaf value to the binary buddy max
2200 * of the updated dmap word, dbSplit() will split
2201 * the binary system of the leaves if need be.
2202 */
2203 dbSplit(tp, word, BUDMIN,
2204 dbMaxBud((u8 *) & dp->wmap[word]));
2205
2206 word += 1;
2207 } else {
2208 /* one or more dmap words are fully contained
2209 * within the block range. determine how many
2210 * words and allocate (set to 1) the bits of these
2211 * words.
2212 */
2213 nwords = rembits >> L2DBWORD;
2214 memset(&dp->wmap[word], (int) ONES, nwords * 4);
2215
2216 /* determine how many bits.
2217 */
2218 nb = nwords << L2DBWORD;
2219
2220 /* now update the appropriate leaves to reflect
2221 * the allocated words.
2222 */
2223 for (; nwords > 0; nwords -= nw) {
2224 if (leaf[word] < BUDMIN) {
2225 jfs_error(bmp->db_ipbmap->i_sb,
2226 "dbAllocBits: leaf page "
2227 "corrupt");
2228 break;
2229 }
2230
2231 /* determine what the leaf value should be
2232 * updated to as the minimum of the l2 number
2233 * of bits being allocated and the l2 number
2234 * of bits currently described by this leaf.
2235 */
2236 size = min((int)leaf[word], NLSTOL2BSZ(nwords));
2237
2238 /* update the leaf to reflect the allocation.
2239 * in addition to setting the leaf value to
2240 * NOFREE, dbSplit() will split the binary
2241 * system of the leaves to reflect the current
2242 * allocation (size).
2243 */
2244 dbSplit(tp, word, size, NOFREE);
2245
2246 /* get the number of dmap words handled */
2247 nw = BUDSIZE(size, BUDMIN);
2248 word += nw;
2249 }
2250 }
2251 }
2252
2253 /* update the free count for this dmap */
2254 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks);
2255
2256 BMAP_LOCK(bmp);
2257
2258 /* if this allocation group is completely free,
2259 * update the maximum allocation group number if this allocation
2260 * group is the new max.
2261 */
2262 agno = blkno >> bmp->db_agl2size;
2263 if (agno > bmp->db_maxag)
2264 bmp->db_maxag = agno;
2265
2266 /* update the free count for the allocation group and map */
2267 bmp->db_agfree[agno] -= nblocks;
2268 bmp->db_nfree -= nblocks;
2269
2270 BMAP_UNLOCK(bmp);
2271 }
2272
2273
2274 /*
2275 * NAME: dbFreeBits()
2276 *
2277 * FUNCTION: free a specified block range from a dmap.
2278 *
2279 * this routine updates the dmap to reflect the working
2280 * state allocation of the specified block range. it directly
2281 * updates the bits of the working map and causes the adjustment
2282 * of the binary buddy system described by the dmap's dmtree
2283 * leaves to reflect the bits freed. it also causes the dmap's
2284 * dmtree, as a whole, to reflect the deallocated range.
2285 *
2286 * PARAMETERS:
2287 * bmp - pointer to bmap descriptor
2288 * dp - pointer to dmap to free bits from.
2289 * blkno - starting block number of the bits to be freed.
2290 * nblocks - number of bits to be freed.
2291 *
2292 * RETURN VALUES: none
2293 *
2294 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2295 */
dbFreeBits(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks)2296 static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2297 int nblocks)
2298 {
2299 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
2300 dmtree_t *tp = (dmtree_t *) & dp->tree;
2301 int size;
2302
2303 /* determine the bit number and word within the dmap of the
2304 * starting block.
2305 */
2306 dbitno = blkno & (BPERDMAP - 1);
2307 word = dbitno >> L2DBWORD;
2308
2309 /* block range better be within the dmap.
2310 */
2311 assert(dbitno + nblocks <= BPERDMAP);
2312
2313 /* free the bits of the dmaps words corresponding to the block range.
2314 * not all bits of the first and last words may be contained within
2315 * the block range. if this is the case, we'll work against those
2316 * words (i.e. partial first and/or last) on an individual basis
2317 * (a single pass), freeing the bits of interest by hand and updating
2318 * the leaf corresponding to the dmap word. a single pass will be used
2319 * for all dmap words fully contained within the specified range.
2320 * within this pass, the bits of all fully contained dmap words will
2321 * be marked as free in a single shot and the leaves will be updated. a
2322 * single leaf may describe the free space of multiple dmap words,
2323 * so we may update only a subset of the actual leaves corresponding
2324 * to the dmap words of the block range.
2325 *
2326 * dbJoin() is used to update leaf values and will join the binary
2327 * buddy system of the leaves if the new leaf values indicate this
2328 * should be done.
2329 */
2330 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
2331 /* determine the bit number within the word and
2332 * the number of bits within the word.
2333 */
2334 wbitno = dbitno & (DBWORD - 1);
2335 nb = min(rembits, DBWORD - wbitno);
2336
2337 /* check if only part of a word is to be freed.
2338 */
2339 if (nb < DBWORD) {
2340 /* free (zero) the appropriate bits within this
2341 * dmap word.
2342 */
2343 dp->wmap[word] &=
2344 cpu_to_le32(~(ONES << (DBWORD - nb)
2345 >> wbitno));
2346
2347 /* update the leaf for this dmap word.
2348 */
2349 dbJoin(tp, word,
2350 dbMaxBud((u8 *) & dp->wmap[word]));
2351
2352 word += 1;
2353 } else {
2354 /* one or more dmap words are fully contained
2355 * within the block range. determine how many
2356 * words and free (zero) the bits of these words.
2357 */
2358 nwords = rembits >> L2DBWORD;
2359 memset(&dp->wmap[word], 0, nwords * 4);
2360
2361 /* determine how many bits.
2362 */
2363 nb = nwords << L2DBWORD;
2364
2365 /* now update the appropriate leaves to reflect
2366 * the freed words.
2367 */
2368 for (; nwords > 0; nwords -= nw) {
2369 /* determine what the leaf value should be
2370 * updated to as the minimum of the l2 number
2371 * of bits being freed and the l2 (max) number
2372 * of bits that can be described by this leaf.
2373 */
2374 size =
2375 min(LITOL2BSZ
2376 (word, L2LPERDMAP, BUDMIN),
2377 NLSTOL2BSZ(nwords));
2378
2379 /* update the leaf.
2380 */
2381 dbJoin(tp, word, size);
2382
2383 /* get the number of dmap words handled.
2384 */
2385 nw = BUDSIZE(size, BUDMIN);
2386 word += nw;
2387 }
2388 }
2389 }
2390
2391 /* update the free count for this dmap.
2392 */
2393 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks);
2394
2395 BMAP_LOCK(bmp);
2396
2397 /* update the free count for the allocation group and
2398 * map.
2399 */
2400 agno = blkno >> bmp->db_agl2size;
2401 bmp->db_nfree += nblocks;
2402 bmp->db_agfree[agno] += nblocks;
2403
2404 /* check if this allocation group is not completely free and
2405 * if it is currently the maximum (rightmost) allocation group.
2406 * if so, establish the new maximum allocation group number by
2407 * searching left for the first allocation group with allocation.
2408 */
2409 if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) ||
2410 (agno == bmp->db_numag - 1 &&
2411 bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) {
2412 while (bmp->db_maxag > 0) {
2413 bmp->db_maxag -= 1;
2414 if (bmp->db_agfree[bmp->db_maxag] !=
2415 bmp->db_agsize)
2416 break;
2417 }
2418
2419 /* re-establish the allocation group preference if the
2420 * current preference is right of the maximum allocation
2421 * group.
2422 */
2423 if (bmp->db_agpref > bmp->db_maxag)
2424 bmp->db_agpref = bmp->db_maxag;
2425 }
2426
2427 BMAP_UNLOCK(bmp);
2428 }
2429
2430
2431 /*
2432 * NAME: dbAdjCtl()
2433 *
2434 * FUNCTION: adjust a dmap control page at a specified level to reflect
2435 * the change in a lower level dmap or dmap control page's
2436 * maximum string of free blocks (i.e. a change in the root
2437 * of the lower level object's dmtree) due to the allocation
2438 * or deallocation of a range of blocks with a single dmap.
2439 *
2440 * on entry, this routine is provided with the new value of
2441 * the lower level dmap or dmap control page root and the
2442 * starting block number of the block range whose allocation
2443 * or deallocation resulted in the root change. this range
2444 * is respresented by a single leaf of the current dmapctl
2445 * and the leaf will be updated with this value, possibly
2446 * causing a binary buddy system within the leaves to be
2447 * split or joined. the update may also cause the dmapctl's
2448 * dmtree to be updated.
2449 *
2450 * if the adjustment of the dmap control page, itself, causes its
2451 * root to change, this change will be bubbled up to the next dmap
2452 * control level by a recursive call to this routine, specifying
2453 * the new root value and the next dmap control page level to
2454 * be adjusted.
2455 * PARAMETERS:
2456 * bmp - pointer to bmap descriptor
2457 * blkno - the first block of a block range within a dmap. it is
2458 * the allocation or deallocation of this block range that
2459 * requires the dmap control page to be adjusted.
2460 * newval - the new value of the lower level dmap or dmap control
2461 * page root.
2462 * alloc - TRUE if adjustment is due to an allocation.
2463 * level - current level of dmap control page (i.e. L0, L1, L2) to
2464 * be adjusted.
2465 *
2466 * RETURN VALUES:
2467 * 0 - success
2468 * -EIO - i/o error
2469 *
2470 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2471 */
2472 static int
dbAdjCtl(struct bmap * bmp,s64 blkno,int newval,int alloc,int level)2473 dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
2474 {
2475 struct metapage *mp;
2476 s8 oldroot;
2477 int oldval;
2478 s64 lblkno;
2479 struct dmapctl *dcp;
2480 int rc, leafno, ti;
2481
2482 /* get the buffer for the dmap control page for the specified
2483 * block number and control page level.
2484 */
2485 lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level);
2486 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
2487 if (mp == NULL)
2488 return -EIO;
2489 dcp = (struct dmapctl *) mp->data;
2490
2491 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
2492 jfs_error(bmp->db_ipbmap->i_sb,
2493 "dbAdjCtl: Corrupt dmapctl page");
2494 release_metapage(mp);
2495 return -EIO;
2496 }
2497
2498 /* determine the leaf number corresponding to the block and
2499 * the index within the dmap control tree.
2500 */
2501 leafno = BLKTOCTLLEAF(blkno, dcp->budmin);
2502 ti = leafno + le32_to_cpu(dcp->leafidx);
2503
2504 /* save the current leaf value and the current root level (i.e.
2505 * maximum l2 free string described by this dmapctl).
2506 */
2507 oldval = dcp->stree[ti];
2508 oldroot = dcp->stree[ROOT];
2509
2510 /* check if this is a control page update for an allocation.
2511 * if so, update the leaf to reflect the new leaf value using
2512 * dbSplit(); otherwise (deallocation), use dbJoin() to udpate
2513 * the leaf with the new value. in addition to updating the
2514 * leaf, dbSplit() will also split the binary buddy system of
2515 * the leaves, if required, and bubble new values within the
2516 * dmapctl tree, if required. similarly, dbJoin() will join
2517 * the binary buddy system of leaves and bubble new values up
2518 * the dmapctl tree as required by the new leaf value.
2519 */
2520 if (alloc) {
2521 /* check if we are in the middle of a binary buddy
2522 * system. this happens when we are performing the
2523 * first allocation out of an allocation group that
2524 * is part (not the first part) of a larger binary
2525 * buddy system. if we are in the middle, back split
2526 * the system prior to calling dbSplit() which assumes
2527 * that it is at the front of a binary buddy system.
2528 */
2529 if (oldval == NOFREE) {
2530 dbBackSplit((dmtree_t *) dcp, leafno);
2531 oldval = dcp->stree[ti];
2532 }
2533 dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval);
2534 } else {
2535 dbJoin((dmtree_t *) dcp, leafno, newval);
2536 }
2537
2538 /* check if the root of the current dmap control page changed due
2539 * to the update and if the current dmap control page is not at
2540 * the current top level (i.e. L0, L1, L2) of the map. if so (i.e.
2541 * root changed and this is not the top level), call this routine
2542 * again (recursion) for the next higher level of the mapping to
2543 * reflect the change in root for the current dmap control page.
2544 */
2545 if (dcp->stree[ROOT] != oldroot) {
2546 /* are we below the top level of the map. if so,
2547 * bubble the root up to the next higher level.
2548 */
2549 if (level < bmp->db_maxlevel) {
2550 /* bubble up the new root of this dmap control page to
2551 * the next level.
2552 */
2553 if ((rc =
2554 dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc,
2555 level + 1))) {
2556 /* something went wrong in bubbling up the new
2557 * root value, so backout the changes to the
2558 * current dmap control page.
2559 */
2560 if (alloc) {
2561 dbJoin((dmtree_t *) dcp, leafno,
2562 oldval);
2563 } else {
2564 /* the dbJoin() above might have
2565 * caused a larger binary buddy system
2566 * to form and we may now be in the
2567 * middle of it. if this is the case,
2568 * back split the buddies.
2569 */
2570 if (dcp->stree[ti] == NOFREE)
2571 dbBackSplit((dmtree_t *)
2572 dcp, leafno);
2573 dbSplit((dmtree_t *) dcp, leafno,
2574 dcp->budmin, oldval);
2575 }
2576
2577 /* release the buffer and return the error.
2578 */
2579 release_metapage(mp);
2580 return (rc);
2581 }
2582 } else {
2583 /* we're at the top level of the map. update
2584 * the bmap control page to reflect the size
2585 * of the maximum free buddy system.
2586 */
2587 assert(level == bmp->db_maxlevel);
2588 if (bmp->db_maxfreebud != oldroot) {
2589 jfs_error(bmp->db_ipbmap->i_sb,
2590 "dbAdjCtl: the maximum free buddy is "
2591 "not the old root");
2592 }
2593 bmp->db_maxfreebud = dcp->stree[ROOT];
2594 }
2595 }
2596
2597 /* write the buffer.
2598 */
2599 write_metapage(mp);
2600
2601 return (0);
2602 }
2603
2604
2605 /*
2606 * NAME: dbSplit()
2607 *
2608 * FUNCTION: update the leaf of a dmtree with a new value, splitting
2609 * the leaf from the binary buddy system of the dmtree's
2610 * leaves, as required.
2611 *
2612 * PARAMETERS:
2613 * tp - pointer to the tree containing the leaf.
2614 * leafno - the number of the leaf to be updated.
2615 * splitsz - the size the binary buddy system starting at the leaf
2616 * must be split to, specified as the log2 number of blocks.
2617 * newval - the new value for the leaf.
2618 *
2619 * RETURN VALUES: none
2620 *
2621 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2622 */
dbSplit(dmtree_t * tp,int leafno,int splitsz,int newval)2623 static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
2624 {
2625 int budsz;
2626 int cursz;
2627 s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
2628
2629 /* check if the leaf needs to be split.
2630 */
2631 if (leaf[leafno] > tp->dmt_budmin) {
2632 /* the split occurs by cutting the buddy system in half
2633 * at the specified leaf until we reach the specified
2634 * size. pick up the starting split size (current size
2635 * - 1 in l2) and the corresponding buddy size.
2636 */
2637 cursz = leaf[leafno] - 1;
2638 budsz = BUDSIZE(cursz, tp->dmt_budmin);
2639
2640 /* split until we reach the specified size.
2641 */
2642 while (cursz >= splitsz) {
2643 /* update the buddy's leaf with its new value.
2644 */
2645 dbAdjTree(tp, leafno ^ budsz, cursz);
2646
2647 /* on to the next size and buddy.
2648 */
2649 cursz -= 1;
2650 budsz >>= 1;
2651 }
2652 }
2653
2654 /* adjust the dmap tree to reflect the specified leaf's new
2655 * value.
2656 */
2657 dbAdjTree(tp, leafno, newval);
2658 }
2659
2660
2661 /*
2662 * NAME: dbBackSplit()
2663 *
2664 * FUNCTION: back split the binary buddy system of dmtree leaves
2665 * that hold a specified leaf until the specified leaf
2666 * starts its own binary buddy system.
2667 *
2668 * the allocators typically perform allocations at the start
2669 * of binary buddy systems and dbSplit() is used to accomplish
2670 * any required splits. in some cases, however, allocation
2671 * may occur in the middle of a binary system and requires a
2672 * back split, with the split proceeding out from the middle of
2673 * the system (less efficient) rather than the start of the
2674 * system (more efficient). the cases in which a back split
2675 * is required are rare and are limited to the first allocation
2676 * within an allocation group which is a part (not first part)
2677 * of a larger binary buddy system and a few exception cases
2678 * in which a previous join operation must be backed out.
2679 *
2680 * PARAMETERS:
2681 * tp - pointer to the tree containing the leaf.
2682 * leafno - the number of the leaf to be updated.
2683 *
2684 * RETURN VALUES: none
2685 *
2686 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2687 */
dbBackSplit(dmtree_t * tp,int leafno)2688 static void dbBackSplit(dmtree_t * tp, int leafno)
2689 {
2690 int budsz, bud, w, bsz, size;
2691 int cursz;
2692 s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
2693
2694 /* leaf should be part (not first part) of a binary
2695 * buddy system.
2696 */
2697 assert(leaf[leafno] == NOFREE);
2698
2699 /* the back split is accomplished by iteratively finding the leaf
2700 * that starts the buddy system that contains the specified leaf and
2701 * splitting that system in two. this iteration continues until
2702 * the specified leaf becomes the start of a buddy system.
2703 *
2704 * determine maximum possible l2 size for the specified leaf.
2705 */
2706 size =
2707 LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs),
2708 tp->dmt_budmin);
2709
2710 /* determine the number of leaves covered by this size. this
2711 * is the buddy size that we will start with as we search for
2712 * the buddy system that contains the specified leaf.
2713 */
2714 budsz = BUDSIZE(size, tp->dmt_budmin);
2715
2716 /* back split.
2717 */
2718 while (leaf[leafno] == NOFREE) {
2719 /* find the leftmost buddy leaf.
2720 */
2721 for (w = leafno, bsz = budsz;; bsz <<= 1,
2722 w = (w < bud) ? w : bud) {
2723 assert(bsz < le32_to_cpu(tp->dmt_nleafs));
2724
2725 /* determine the buddy.
2726 */
2727 bud = w ^ bsz;
2728
2729 /* check if this buddy is the start of the system.
2730 */
2731 if (leaf[bud] != NOFREE) {
2732 /* split the leaf at the start of the
2733 * system in two.
2734 */
2735 cursz = leaf[bud] - 1;
2736 dbSplit(tp, bud, cursz, cursz);
2737 break;
2738 }
2739 }
2740 }
2741
2742 assert(leaf[leafno] == size);
2743 }
2744
2745
2746 /*
2747 * NAME: dbJoin()
2748 *
2749 * FUNCTION: update the leaf of a dmtree with a new value, joining
2750 * the leaf with other leaves of the dmtree into a multi-leaf
2751 * binary buddy system, as required.
2752 *
2753 * PARAMETERS:
2754 * tp - pointer to the tree containing the leaf.
2755 * leafno - the number of the leaf to be updated.
2756 * newval - the new value for the leaf.
2757 *
2758 * RETURN VALUES: none
2759 */
dbJoin(dmtree_t * tp,int leafno,int newval)2760 static void dbJoin(dmtree_t * tp, int leafno, int newval)
2761 {
2762 int budsz, buddy;
2763 s8 *leaf;
2764
2765 /* can the new leaf value require a join with other leaves ?
2766 */
2767 if (newval >= tp->dmt_budmin) {
2768 /* pickup a pointer to the leaves of the tree.
2769 */
2770 leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
2771
2772 /* try to join the specified leaf into a large binary
2773 * buddy system. the join proceeds by attempting to join
2774 * the specified leafno with its buddy (leaf) at new value.
2775 * if the join occurs, we attempt to join the left leaf
2776 * of the joined buddies with its buddy at new value + 1.
2777 * we continue to join until we find a buddy that cannot be
2778 * joined (does not have a value equal to the size of the
2779 * last join) or until all leaves have been joined into a
2780 * single system.
2781 *
2782 * get the buddy size (number of words covered) of
2783 * the new value.
2784 */
2785 budsz = BUDSIZE(newval, tp->dmt_budmin);
2786
2787 /* try to join.
2788 */
2789 while (budsz < le32_to_cpu(tp->dmt_nleafs)) {
2790 /* get the buddy leaf.
2791 */
2792 buddy = leafno ^ budsz;
2793
2794 /* if the leaf's new value is greater than its
2795 * buddy's value, we join no more.
2796 */
2797 if (newval > leaf[buddy])
2798 break;
2799
2800 assert(newval == leaf[buddy]);
2801
2802 /* check which (leafno or buddy) is the left buddy.
2803 * the left buddy gets to claim the blocks resulting
2804 * from the join while the right gets to claim none.
2805 * the left buddy is also eligable to participate in
2806 * a join at the next higher level while the right
2807 * is not.
2808 *
2809 */
2810 if (leafno < buddy) {
2811 /* leafno is the left buddy.
2812 */
2813 dbAdjTree(tp, buddy, NOFREE);
2814 } else {
2815 /* buddy is the left buddy and becomes
2816 * leafno.
2817 */
2818 dbAdjTree(tp, leafno, NOFREE);
2819 leafno = buddy;
2820 }
2821
2822 /* on to try the next join.
2823 */
2824 newval += 1;
2825 budsz <<= 1;
2826 }
2827 }
2828
2829 /* update the leaf value.
2830 */
2831 dbAdjTree(tp, leafno, newval);
2832 }
2833
2834
2835 /*
2836 * NAME: dbAdjTree()
2837 *
2838 * FUNCTION: update a leaf of a dmtree with a new value, adjusting
2839 * the dmtree, as required, to reflect the new leaf value.
2840 * the combination of any buddies must already be done before
2841 * this is called.
2842 *
2843 * PARAMETERS:
2844 * tp - pointer to the tree to be adjusted.
2845 * leafno - the number of the leaf to be updated.
2846 * newval - the new value for the leaf.
2847 *
2848 * RETURN VALUES: none
2849 */
dbAdjTree(dmtree_t * tp,int leafno,int newval)2850 static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
2851 {
2852 int lp, pp, k;
2853 int max;
2854
2855 /* pick up the index of the leaf for this leafno.
2856 */
2857 lp = leafno + le32_to_cpu(tp->dmt_leafidx);
2858
2859 /* is the current value the same as the old value ? if so,
2860 * there is nothing to do.
2861 */
2862 if (tp->dmt_stree[lp] == newval)
2863 return;
2864
2865 /* set the new value.
2866 */
2867 tp->dmt_stree[lp] = newval;
2868
2869 /* bubble the new value up the tree as required.
2870 */
2871 for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) {
2872 /* get the index of the first leaf of the 4 leaf
2873 * group containing the specified leaf (leafno).
2874 */
2875 lp = ((lp - 1) & ~0x03) + 1;
2876
2877 /* get the index of the parent of this 4 leaf group.
2878 */
2879 pp = (lp - 1) >> 2;
2880
2881 /* determine the maximum of the 4 leaves.
2882 */
2883 max = TREEMAX(&tp->dmt_stree[lp]);
2884
2885 /* if the maximum of the 4 is the same as the
2886 * parent's value, we're done.
2887 */
2888 if (tp->dmt_stree[pp] == max)
2889 break;
2890
2891 /* parent gets new value.
2892 */
2893 tp->dmt_stree[pp] = max;
2894
2895 /* parent becomes leaf for next go-round.
2896 */
2897 lp = pp;
2898 }
2899 }
2900
2901
2902 /*
2903 * NAME: dbFindLeaf()
2904 *
2905 * FUNCTION: search a dmtree_t for sufficient free blocks, returning
2906 * the index of a leaf describing the free blocks if
2907 * sufficient free blocks are found.
2908 *
2909 * the search starts at the top of the dmtree_t tree and
2910 * proceeds down the tree to the leftmost leaf with sufficient
2911 * free space.
2912 *
2913 * PARAMETERS:
2914 * tp - pointer to the tree to be searched.
2915 * l2nb - log2 number of free blocks to search for.
2916 * leafidx - return pointer to be set to the index of the leaf
2917 * describing at least l2nb free blocks if sufficient
2918 * free blocks are found.
2919 *
2920 * RETURN VALUES:
2921 * 0 - success
2922 * -ENOSPC - insufficient free blocks.
2923 */
dbFindLeaf(dmtree_t * tp,int l2nb,int * leafidx)2924 static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
2925 {
2926 int ti, n = 0, k, x = 0;
2927
2928 /* first check the root of the tree to see if there is
2929 * sufficient free space.
2930 */
2931 if (l2nb > tp->dmt_stree[ROOT])
2932 return -ENOSPC;
2933
2934 /* sufficient free space available. now search down the tree
2935 * starting at the next level for the leftmost leaf that
2936 * describes sufficient free space.
2937 */
2938 for (k = le32_to_cpu(tp->dmt_height), ti = 1;
2939 k > 0; k--, ti = ((ti + n) << 2) + 1) {
2940 /* search the four nodes at this level, starting from
2941 * the left.
2942 */
2943 for (x = ti, n = 0; n < 4; n++) {
2944 /* sufficient free space found. move to the next
2945 * level (or quit if this is the last level).
2946 */
2947 if (l2nb <= tp->dmt_stree[x + n])
2948 break;
2949 }
2950
2951 /* better have found something since the higher
2952 * levels of the tree said it was here.
2953 */
2954 assert(n < 4);
2955 }
2956
2957 /* set the return to the leftmost leaf describing sufficient
2958 * free space.
2959 */
2960 *leafidx = x + n - le32_to_cpu(tp->dmt_leafidx);
2961
2962 return (0);
2963 }
2964
2965
2966 /*
2967 * NAME: dbFindBits()
2968 *
2969 * FUNCTION: find a specified number of binary buddy free bits within a
2970 * dmap bitmap word value.
2971 *
2972 * this routine searches the bitmap value for (1 << l2nb) free
2973 * bits at (1 << l2nb) alignments within the value.
2974 *
2975 * PARAMETERS:
2976 * word - dmap bitmap word value.
2977 * l2nb - number of free bits specified as a log2 number.
2978 *
2979 * RETURN VALUES:
2980 * starting bit number of free bits.
2981 */
dbFindBits(u32 word,int l2nb)2982 static int dbFindBits(u32 word, int l2nb)
2983 {
2984 int bitno, nb;
2985 u32 mask;
2986
2987 /* get the number of bits.
2988 */
2989 nb = 1 << l2nb;
2990 assert(nb <= DBWORD);
2991
2992 /* complement the word so we can use a mask (i.e. 0s represent
2993 * free bits) and compute the mask.
2994 */
2995 word = ~word;
2996 mask = ONES << (DBWORD - nb);
2997
2998 /* scan the word for nb free bits at nb alignments.
2999 */
3000 for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) {
3001 if ((mask & word) == mask)
3002 break;
3003 }
3004
3005 ASSERT(bitno < 32);
3006
3007 /* return the bit number.
3008 */
3009 return (bitno);
3010 }
3011
3012
3013 /*
3014 * NAME: dbMaxBud(u8 *cp)
3015 *
3016 * FUNCTION: determine the largest binary buddy string of free
3017 * bits within 32-bits of the map.
3018 *
3019 * PARAMETERS:
3020 * cp - pointer to the 32-bit value.
3021 *
3022 * RETURN VALUES:
3023 * largest binary buddy of free bits within a dmap word.
3024 */
dbMaxBud(u8 * cp)3025 static int dbMaxBud(u8 * cp)
3026 {
3027 signed char tmp1, tmp2;
3028
3029 /* check if the wmap word is all free. if so, the
3030 * free buddy size is BUDMIN.
3031 */
3032 if (*((uint *) cp) == 0)
3033 return (BUDMIN);
3034
3035 /* check if the wmap word is half free. if so, the
3036 * free buddy size is BUDMIN-1.
3037 */
3038 if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0)
3039 return (BUDMIN - 1);
3040
3041 /* not all free or half free. determine the free buddy
3042 * size thru table lookup using quarters of the wmap word.
3043 */
3044 tmp1 = max(budtab[cp[2]], budtab[cp[3]]);
3045 tmp2 = max(budtab[cp[0]], budtab[cp[1]]);
3046 return (max(tmp1, tmp2));
3047 }
3048
3049
3050 /*
3051 * NAME: cnttz(uint word)
3052 *
3053 * FUNCTION: determine the number of trailing zeros within a 32-bit
3054 * value.
3055 *
3056 * PARAMETERS:
3057 * value - 32-bit value to be examined.
3058 *
3059 * RETURN VALUES:
3060 * count of trailing zeros
3061 */
cnttz(u32 word)3062 static int cnttz(u32 word)
3063 {
3064 int n;
3065
3066 for (n = 0; n < 32; n++, word >>= 1) {
3067 if (word & 0x01)
3068 break;
3069 }
3070
3071 return (n);
3072 }
3073
3074
3075 /*
3076 * NAME: cntlz(u32 value)
3077 *
3078 * FUNCTION: determine the number of leading zeros within a 32-bit
3079 * value.
3080 *
3081 * PARAMETERS:
3082 * value - 32-bit value to be examined.
3083 *
3084 * RETURN VALUES:
3085 * count of leading zeros
3086 */
cntlz(u32 value)3087 static int cntlz(u32 value)
3088 {
3089 int n;
3090
3091 for (n = 0; n < 32; n++, value <<= 1) {
3092 if (value & HIGHORDER)
3093 break;
3094 }
3095 return (n);
3096 }
3097
3098
3099 /*
3100 * NAME: blkstol2(s64 nb)
3101 *
3102 * FUNCTION: convert a block count to its log2 value. if the block
3103 * count is not a l2 multiple, it is rounded up to the next
3104 * larger l2 multiple.
3105 *
3106 * PARAMETERS:
3107 * nb - number of blocks
3108 *
3109 * RETURN VALUES:
3110 * log2 number of blocks
3111 */
blkstol2(s64 nb)3112 int blkstol2(s64 nb)
3113 {
3114 int l2nb;
3115 s64 mask; /* meant to be signed */
3116
3117 mask = (s64) 1 << (64 - 1);
3118
3119 /* count the leading bits.
3120 */
3121 for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) {
3122 /* leading bit found.
3123 */
3124 if (nb & mask) {
3125 /* determine the l2 value.
3126 */
3127 l2nb = (64 - 1) - l2nb;
3128
3129 /* check if we need to round up.
3130 */
3131 if (~mask & nb)
3132 l2nb++;
3133
3134 return (l2nb);
3135 }
3136 }
3137 assert(0);
3138 return 0; /* fix compiler warning */
3139 }
3140
3141
3142 /*
3143 * NAME: dbAllocBottomUp()
3144 *
3145 * FUNCTION: alloc the specified block range from the working block
3146 * allocation map.
3147 *
3148 * the blocks will be alloc from the working map one dmap
3149 * at a time.
3150 *
3151 * PARAMETERS:
3152 * ip - pointer to in-core inode;
3153 * blkno - starting block number to be freed.
3154 * nblocks - number of blocks to be freed.
3155 *
3156 * RETURN VALUES:
3157 * 0 - success
3158 * -EIO - i/o error
3159 */
dbAllocBottomUp(struct inode * ip,s64 blkno,s64 nblocks)3160 int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
3161 {
3162 struct metapage *mp;
3163 struct dmap *dp;
3164 int nb, rc;
3165 s64 lblkno, rem;
3166 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
3167 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
3168
3169 IREAD_LOCK(ipbmap);
3170
3171 /* block to be allocated better be within the mapsize. */
3172 ASSERT(nblocks <= bmp->db_mapsize - blkno);
3173
3174 /*
3175 * allocate the blocks a dmap at a time.
3176 */
3177 mp = NULL;
3178 for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
3179 /* release previous dmap if any */
3180 if (mp) {
3181 write_metapage(mp);
3182 }
3183
3184 /* get the buffer for the current dmap. */
3185 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
3186 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
3187 if (mp == NULL) {
3188 IREAD_UNLOCK(ipbmap);
3189 return -EIO;
3190 }
3191 dp = (struct dmap *) mp->data;
3192
3193 /* determine the number of blocks to be allocated from
3194 * this dmap.
3195 */
3196 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
3197
3198 DBFREECK(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
3199
3200 /* allocate the blocks. */
3201 if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) {
3202 release_metapage(mp);
3203 IREAD_UNLOCK(ipbmap);
3204 return (rc);
3205 }
3206
3207 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
3208 }
3209
3210 /* write the last buffer. */
3211 write_metapage(mp);
3212
3213 IREAD_UNLOCK(ipbmap);
3214
3215 return (0);
3216 }
3217
3218
dbAllocDmapBU(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks)3219 static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
3220 int nblocks)
3221 {
3222 int rc;
3223 int dbitno, word, rembits, nb, nwords, wbitno, agno;
3224 s8 oldroot, *leaf;
3225 struct dmaptree *tp = (struct dmaptree *) & dp->tree;
3226
3227 /* save the current value of the root (i.e. maximum free string)
3228 * of the dmap tree.
3229 */
3230 oldroot = tp->stree[ROOT];
3231
3232 /* pick up a pointer to the leaves of the dmap tree */
3233 leaf = tp->stree + LEAFIND;
3234
3235 /* determine the bit number and word within the dmap of the
3236 * starting block.
3237 */
3238 dbitno = blkno & (BPERDMAP - 1);
3239 word = dbitno >> L2DBWORD;
3240
3241 /* block range better be within the dmap */
3242 assert(dbitno + nblocks <= BPERDMAP);
3243
3244 /* allocate the bits of the dmap's words corresponding to the block
3245 * range. not all bits of the first and last words may be contained
3246 * within the block range. if this is the case, we'll work against
3247 * those words (i.e. partial first and/or last) on an individual basis
3248 * (a single pass), allocating the bits of interest by hand and
3249 * updating the leaf corresponding to the dmap word. a single pass
3250 * will be used for all dmap words fully contained within the
3251 * specified range. within this pass, the bits of all fully contained
3252 * dmap words will be marked as free in a single shot and the leaves
3253 * will be updated. a single leaf may describe the free space of
3254 * multiple dmap words, so we may update only a subset of the actual
3255 * leaves corresponding to the dmap words of the block range.
3256 */
3257 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
3258 /* determine the bit number within the word and
3259 * the number of bits within the word.
3260 */
3261 wbitno = dbitno & (DBWORD - 1);
3262 nb = min(rembits, DBWORD - wbitno);
3263
3264 /* check if only part of a word is to be allocated.
3265 */
3266 if (nb < DBWORD) {
3267 /* allocate (set to 1) the appropriate bits within
3268 * this dmap word.
3269 */
3270 dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
3271 >> wbitno);
3272
3273 word++;
3274 } else {
3275 /* one or more dmap words are fully contained
3276 * within the block range. determine how many
3277 * words and allocate (set to 1) the bits of these
3278 * words.
3279 */
3280 nwords = rembits >> L2DBWORD;
3281 memset(&dp->wmap[word], (int) ONES, nwords * 4);
3282
3283 /* determine how many bits */
3284 nb = nwords << L2DBWORD;
3285 word += nwords;
3286 }
3287 }
3288
3289 /* update the free count for this dmap */
3290 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks);
3291
3292 /* reconstruct summary tree */
3293 dbInitDmapTree(dp);
3294
3295 BMAP_LOCK(bmp);
3296
3297 /* if this allocation group is completely free,
3298 * update the highest active allocation group number
3299 * if this allocation group is the new max.
3300 */
3301 agno = blkno >> bmp->db_agl2size;
3302 if (agno > bmp->db_maxag)
3303 bmp->db_maxag = agno;
3304
3305 /* update the free count for the allocation group and map */
3306 bmp->db_agfree[agno] -= nblocks;
3307 bmp->db_nfree -= nblocks;
3308
3309 BMAP_UNLOCK(bmp);
3310
3311 /* if the root has not changed, done. */
3312 if (tp->stree[ROOT] == oldroot)
3313 return (0);
3314
3315 /* root changed. bubble the change up to the dmap control pages.
3316 * if the adjustment of the upper level control pages fails,
3317 * backout the bit allocation (thus making everything consistent).
3318 */
3319 if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0)))
3320 dbFreeBits(bmp, dp, blkno, nblocks);
3321
3322 return (rc);
3323 }
3324
3325
3326 /*
3327 * NAME: dbExtendFS()
3328 *
3329 * FUNCTION: extend bmap from blkno for nblocks;
3330 * dbExtendFS() updates bmap ready for dbAllocBottomUp();
3331 *
3332 * L2
3333 * |
3334 * L1---------------------------------L1
3335 * | |
3336 * L0---------L0---------L0 L0---------L0---------L0
3337 * | | | | | |
3338 * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm;
3339 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
3340 *
3341 * <---old---><----------------------------extend----------------------->
3342 */
dbExtendFS(struct inode * ipbmap,s64 blkno,s64 nblocks)3343 int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3344 {
3345 struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb);
3346 int nbperpage = sbi->nbperpage;
3347 int i, i0 = TRUE, j, j0 = TRUE, k, n;
3348 s64 newsize;
3349 s64 p;
3350 struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL;
3351 struct dmapctl *l2dcp, *l1dcp, *l0dcp;
3352 struct dmap *dp;
3353 s8 *l0leaf, *l1leaf, *l2leaf;
3354 struct bmap *bmp = sbi->bmap;
3355 int agno, l2agsize, oldl2agsize;
3356 s64 ag_rem;
3357
3358 newsize = blkno + nblocks;
3359
3360 jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld",
3361 (long long) blkno, (long long) nblocks, (long long) newsize);
3362
3363 /*
3364 * initialize bmap control page.
3365 *
3366 * all the data in bmap control page should exclude
3367 * the mkfs hidden dmap page.
3368 */
3369
3370 /* update mapsize */
3371 bmp->db_mapsize = newsize;
3372 bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize);
3373
3374 /* compute new AG size */
3375 l2agsize = dbGetL2AGSize(newsize);
3376 oldl2agsize = bmp->db_agl2size;
3377
3378 bmp->db_agl2size = l2agsize;
3379 bmp->db_agsize = 1 << l2agsize;
3380
3381 /* compute new number of AG */
3382 agno = bmp->db_numag;
3383 bmp->db_numag = newsize >> l2agsize;
3384 bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
3385
3386 /*
3387 * reconfigure db_agfree[]
3388 * from old AG configuration to new AG configuration;
3389 *
3390 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
3391 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
3392 * note: new AG size = old AG size * (2**x).
3393 */
3394 if (l2agsize == oldl2agsize)
3395 goto extend;
3396 k = 1 << (l2agsize - oldl2agsize);
3397 ag_rem = bmp->db_agfree[0]; /* save agfree[0] */
3398 for (i = 0, n = 0; i < agno; n++) {
3399 bmp->db_agfree[n] = 0; /* init collection point */
3400
3401 /* coalesce cotiguous k AGs; */
3402 for (j = 0; j < k && i < agno; j++, i++) {
3403 /* merge AGi to AGn */
3404 bmp->db_agfree[n] += bmp->db_agfree[i];
3405 }
3406 }
3407 bmp->db_agfree[0] += ag_rem; /* restore agfree[0] */
3408
3409 for (; n < MAXAG; n++)
3410 bmp->db_agfree[n] = 0;
3411
3412 /*
3413 * update highest active ag number
3414 */
3415
3416 bmp->db_maxag = bmp->db_maxag / k;
3417
3418 /*
3419 * extend bmap
3420 *
3421 * update bit maps and corresponding level control pages;
3422 * global control page db_nfree, db_agfree[agno], db_maxfreebud;
3423 */
3424 extend:
3425 /* get L2 page */
3426 p = BMAPBLKNO + nbperpage; /* L2 page */
3427 l2mp = read_metapage(ipbmap, p, PSIZE, 0);
3428 if (!l2mp) {
3429 jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read");
3430 return -EIO;
3431 }
3432 l2dcp = (struct dmapctl *) l2mp->data;
3433
3434 /* compute start L1 */
3435 k = blkno >> L2MAXL1SIZE;
3436 l2leaf = l2dcp->stree + CTLLEAFIND + k;
3437 p = BLKTOL1(blkno, sbi->l2nbperpage); /* L1 page */
3438
3439 /*
3440 * extend each L1 in L2
3441 */
3442 for (; k < LPERCTL; k++, p += nbperpage) {
3443 /* get L1 page */
3444 if (j0) {
3445 /* read in L1 page: (blkno & (MAXL1SIZE - 1)) */
3446 l1mp = read_metapage(ipbmap, p, PSIZE, 0);
3447 if (l1mp == NULL)
3448 goto errout;
3449 l1dcp = (struct dmapctl *) l1mp->data;
3450
3451 /* compute start L0 */
3452 j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE;
3453 l1leaf = l1dcp->stree + CTLLEAFIND + j;
3454 p = BLKTOL0(blkno, sbi->l2nbperpage);
3455 j0 = FALSE;
3456 } else {
3457 /* assign/init L1 page */
3458 l1mp = get_metapage(ipbmap, p, PSIZE, 0);
3459 if (l1mp == NULL)
3460 goto errout;
3461
3462 l1dcp = (struct dmapctl *) l1mp->data;
3463
3464 /* compute start L0 */
3465 j = 0;
3466 l1leaf = l1dcp->stree + CTLLEAFIND;
3467 p += nbperpage; /* 1st L0 of L1.k */
3468 }
3469
3470 /*
3471 * extend each L0 in L1
3472 */
3473 for (; j < LPERCTL; j++) {
3474 /* get L0 page */
3475 if (i0) {
3476 /* read in L0 page: (blkno & (MAXL0SIZE - 1)) */
3477
3478 l0mp = read_metapage(ipbmap, p, PSIZE, 0);
3479 if (l0mp == NULL)
3480 goto errout;
3481 l0dcp = (struct dmapctl *) l0mp->data;
3482
3483 /* compute start dmap */
3484 i = (blkno & (MAXL0SIZE - 1)) >>
3485 L2BPERDMAP;
3486 l0leaf = l0dcp->stree + CTLLEAFIND + i;
3487 p = BLKTODMAP(blkno,
3488 sbi->l2nbperpage);
3489 i0 = FALSE;
3490 } else {
3491 /* assign/init L0 page */
3492 l0mp = get_metapage(ipbmap, p, PSIZE, 0);
3493 if (l0mp == NULL)
3494 goto errout;
3495
3496 l0dcp = (struct dmapctl *) l0mp->data;
3497
3498 /* compute start dmap */
3499 i = 0;
3500 l0leaf = l0dcp->stree + CTLLEAFIND;
3501 p += nbperpage; /* 1st dmap of L0.j */
3502 }
3503
3504 /*
3505 * extend each dmap in L0
3506 */
3507 for (; i < LPERCTL; i++) {
3508 /*
3509 * reconstruct the dmap page, and
3510 * initialize corresponding parent L0 leaf
3511 */
3512 if ((n = blkno & (BPERDMAP - 1))) {
3513 /* read in dmap page: */
3514 mp = read_metapage(ipbmap, p,
3515 PSIZE, 0);
3516 if (mp == NULL)
3517 goto errout;
3518 n = min(nblocks, (s64)BPERDMAP - n);
3519 } else {
3520 /* assign/init dmap page */
3521 mp = read_metapage(ipbmap, p,
3522 PSIZE, 0);
3523 if (mp == NULL)
3524 goto errout;
3525
3526 n = min(nblocks, (s64)BPERDMAP);
3527 }
3528
3529 dp = (struct dmap *) mp->data;
3530 *l0leaf = dbInitDmap(dp, blkno, n);
3531
3532 bmp->db_nfree += n;
3533 agno = le64_to_cpu(dp->start) >> l2agsize;
3534 bmp->db_agfree[agno] += n;
3535
3536 write_metapage(mp);
3537
3538 l0leaf++;
3539 p += nbperpage;
3540
3541 blkno += n;
3542 nblocks -= n;
3543 if (nblocks == 0)
3544 break;
3545 } /* for each dmap in a L0 */
3546
3547 /*
3548 * build current L0 page from its leaves, and
3549 * initialize corresponding parent L1 leaf
3550 */
3551 *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i);
3552 write_metapage(l0mp);
3553 l0mp = NULL;
3554
3555 if (nblocks)
3556 l1leaf++; /* continue for next L0 */
3557 else {
3558 /* more than 1 L0 ? */
3559 if (j > 0)
3560 break; /* build L1 page */
3561 else {
3562 /* summarize in global bmap page */
3563 bmp->db_maxfreebud = *l1leaf;
3564 release_metapage(l1mp);
3565 release_metapage(l2mp);
3566 goto finalize;
3567 }
3568 }
3569 } /* for each L0 in a L1 */
3570
3571 /*
3572 * build current L1 page from its leaves, and
3573 * initialize corresponding parent L2 leaf
3574 */
3575 *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j);
3576 write_metapage(l1mp);
3577 l1mp = NULL;
3578
3579 if (nblocks)
3580 l2leaf++; /* continue for next L1 */
3581 else {
3582 /* more than 1 L1 ? */
3583 if (k > 0)
3584 break; /* build L2 page */
3585 else {
3586 /* summarize in global bmap page */
3587 bmp->db_maxfreebud = *l2leaf;
3588 release_metapage(l2mp);
3589 goto finalize;
3590 }
3591 }
3592 } /* for each L1 in a L2 */
3593
3594 jfs_error(ipbmap->i_sb,
3595 "dbExtendFS: function has not returned as expected");
3596 errout:
3597 if (l0mp)
3598 release_metapage(l0mp);
3599 if (l1mp)
3600 release_metapage(l1mp);
3601 release_metapage(l2mp);
3602 return -EIO;
3603
3604 /*
3605 * finalize bmap control page
3606 */
3607 finalize:
3608
3609 return 0;
3610 }
3611
3612
3613 /*
3614 * dbFinalizeBmap()
3615 */
dbFinalizeBmap(struct inode * ipbmap)3616 void dbFinalizeBmap(struct inode *ipbmap)
3617 {
3618 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
3619 int actags, inactags, l2nl;
3620 s64 ag_rem, actfree, inactfree, avgfree;
3621 int i, n;
3622
3623 /*
3624 * finalize bmap control page
3625 */
3626 //finalize:
3627 /*
3628 * compute db_agpref: preferred ag to allocate from
3629 * (the leftmost ag with average free space in it);
3630 */
3631 //agpref:
3632 /* get the number of active ags and inacitve ags */
3633 actags = bmp->db_maxag + 1;
3634 inactags = bmp->db_numag - actags;
3635 ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1); /* ??? */
3636
3637 /* determine how many blocks are in the inactive allocation
3638 * groups. in doing this, we must account for the fact that
3639 * the rightmost group might be a partial group (i.e. file
3640 * system size is not a multiple of the group size).
3641 */
3642 inactfree = (inactags && ag_rem) ?
3643 ((inactags - 1) << bmp->db_agl2size) + ag_rem
3644 : inactags << bmp->db_agl2size;
3645
3646 /* determine how many free blocks are in the active
3647 * allocation groups plus the average number of free blocks
3648 * within the active ags.
3649 */
3650 actfree = bmp->db_nfree - inactfree;
3651 avgfree = (u32) actfree / (u32) actags;
3652
3653 /* if the preferred allocation group has not average free space.
3654 * re-establish the preferred group as the leftmost
3655 * group with average free space.
3656 */
3657 if (bmp->db_agfree[bmp->db_agpref] < avgfree) {
3658 for (bmp->db_agpref = 0; bmp->db_agpref < actags;
3659 bmp->db_agpref++) {
3660 if (bmp->db_agfree[bmp->db_agpref] >= avgfree)
3661 break;
3662 }
3663 if (bmp->db_agpref >= bmp->db_numag) {
3664 jfs_error(ipbmap->i_sb,
3665 "cannot find ag with average freespace");
3666 }
3667 }
3668
3669 /*
3670 * compute db_aglevel, db_agheigth, db_width, db_agstart:
3671 * an ag is covered in aglevel dmapctl summary tree,
3672 * at agheight level height (from leaf) with agwidth number of nodes
3673 * each, which starts at agstart index node of the smmary tree node
3674 * array;
3675 */
3676 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
3677 l2nl =
3678 bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
3679 bmp->db_agheigth = l2nl >> 1;
3680 bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1));
3681 for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0;
3682 i--) {
3683 bmp->db_agstart += n;
3684 n <<= 2;
3685 }
3686
3687 }
3688
3689
3690 /*
3691 * NAME: dbInitDmap()/ujfs_idmap_page()
3692 *
3693 * FUNCTION: initialize working/persistent bitmap of the dmap page
3694 * for the specified number of blocks:
3695 *
3696 * at entry, the bitmaps had been initialized as free (ZEROS);
3697 * The number of blocks will only account for the actually
3698 * existing blocks. Blocks which don't actually exist in
3699 * the aggregate will be marked as allocated (ONES);
3700 *
3701 * PARAMETERS:
3702 * dp - pointer to page of map
3703 * nblocks - number of blocks this page
3704 *
3705 * RETURNS: NONE
3706 */
dbInitDmap(struct dmap * dp,s64 Blkno,int nblocks)3707 static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks)
3708 {
3709 int blkno, w, b, r, nw, nb, i;
3710
3711 /* starting block number within the dmap */
3712 blkno = Blkno & (BPERDMAP - 1);
3713
3714 if (blkno == 0) {
3715 dp->nblocks = dp->nfree = cpu_to_le32(nblocks);
3716 dp->start = cpu_to_le64(Blkno);
3717
3718 if (nblocks == BPERDMAP) {
3719 memset(&dp->wmap[0], 0, LPERDMAP * 4);
3720 memset(&dp->pmap[0], 0, LPERDMAP * 4);
3721 goto initTree;
3722 }
3723 } else {
3724 dp->nblocks =
3725 cpu_to_le32(le32_to_cpu(dp->nblocks) + nblocks);
3726 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks);
3727 }
3728
3729 /* word number containing start block number */
3730 w = blkno >> L2DBWORD;
3731
3732 /*
3733 * free the bits corresponding to the block range (ZEROS):
3734 * note: not all bits of the first and last words may be contained
3735 * within the block range.
3736 */
3737 for (r = nblocks; r > 0; r -= nb, blkno += nb) {
3738 /* number of bits preceding range to be freed in the word */
3739 b = blkno & (DBWORD - 1);
3740 /* number of bits to free in the word */
3741 nb = min(r, DBWORD - b);
3742
3743 /* is partial word to be freed ? */
3744 if (nb < DBWORD) {
3745 /* free (set to 0) from the bitmap word */
3746 dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
3747 >> b));
3748 dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
3749 >> b));
3750
3751 /* skip the word freed */
3752 w++;
3753 } else {
3754 /* free (set to 0) contiguous bitmap words */
3755 nw = r >> L2DBWORD;
3756 memset(&dp->wmap[w], 0, nw * 4);
3757 memset(&dp->pmap[w], 0, nw * 4);
3758
3759 /* skip the words freed */
3760 nb = nw << L2DBWORD;
3761 w += nw;
3762 }
3763 }
3764
3765 /*
3766 * mark bits following the range to be freed (non-existing
3767 * blocks) as allocated (ONES)
3768 */
3769
3770 if (blkno == BPERDMAP)
3771 goto initTree;
3772
3773 /* the first word beyond the end of existing blocks */
3774 w = blkno >> L2DBWORD;
3775
3776 /* does nblocks fall on a 32-bit boundary ? */
3777 b = blkno & (DBWORD - 1);
3778 if (b) {
3779 /* mark a partial word allocated */
3780 dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b);
3781 w++;
3782 }
3783
3784 /* set the rest of the words in the page to allocated (ONES) */
3785 for (i = w; i < LPERDMAP; i++)
3786 dp->pmap[i] = dp->wmap[i] = ONES;
3787
3788 /*
3789 * init tree
3790 */
3791 initTree:
3792 return (dbInitDmapTree(dp));
3793 }
3794
3795
3796 /*
3797 * NAME: dbInitDmapTree()/ujfs_complete_dmap()
3798 *
3799 * FUNCTION: initialize summary tree of the specified dmap:
3800 *
3801 * at entry, bitmap of the dmap has been initialized;
3802 *
3803 * PARAMETERS:
3804 * dp - dmap to complete
3805 * blkno - starting block number for this dmap
3806 * treemax - will be filled in with max free for this dmap
3807 *
3808 * RETURNS: max free string at the root of the tree
3809 */
dbInitDmapTree(struct dmap * dp)3810 static int dbInitDmapTree(struct dmap * dp)
3811 {
3812 struct dmaptree *tp;
3813 s8 *cp;
3814 int i;
3815
3816 /* init fixed info of tree */
3817 tp = &dp->tree;
3818 tp->nleafs = cpu_to_le32(LPERDMAP);
3819 tp->l2nleafs = cpu_to_le32(L2LPERDMAP);
3820 tp->leafidx = cpu_to_le32(LEAFIND);
3821 tp->height = cpu_to_le32(4);
3822 tp->budmin = BUDMIN;
3823
3824 /* init each leaf from corresponding wmap word:
3825 * note: leaf is set to NOFREE(-1) if all blocks of corresponding
3826 * bitmap word are allocated.
3827 */
3828 cp = tp->stree + le32_to_cpu(tp->leafidx);
3829 for (i = 0; i < LPERDMAP; i++)
3830 *cp++ = dbMaxBud((u8 *) & dp->wmap[i]);
3831
3832 /* build the dmap's binary buddy summary tree */
3833 return (dbInitTree(tp));
3834 }
3835
3836
3837 /*
3838 * NAME: dbInitTree()/ujfs_adjtree()
3839 *
3840 * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl.
3841 *
3842 * at entry, the leaves of the tree has been initialized
3843 * from corresponding bitmap word or root of summary tree
3844 * of the child control page;
3845 * configure binary buddy system at the leaf level, then
3846 * bubble up the values of the leaf nodes up the tree.
3847 *
3848 * PARAMETERS:
3849 * cp - Pointer to the root of the tree
3850 * l2leaves- Number of leaf nodes as a power of 2
3851 * l2min - Number of blocks that can be covered by a leaf
3852 * as a power of 2
3853 *
3854 * RETURNS: max free string at the root of the tree
3855 */
dbInitTree(struct dmaptree * dtp)3856 static int dbInitTree(struct dmaptree * dtp)
3857 {
3858 int l2max, l2free, bsize, nextb, i;
3859 int child, parent, nparent;
3860 s8 *tp, *cp, *cp1;
3861
3862 tp = dtp->stree;
3863
3864 /* Determine the maximum free string possible for the leaves */
3865 l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin;
3866
3867 /*
3868 * configure the leaf levevl into binary buddy system
3869 *
3870 * Try to combine buddies starting with a buddy size of 1
3871 * (i.e. two leaves). At a buddy size of 1 two buddy leaves
3872 * can be combined if both buddies have a maximum free of l2min;
3873 * the combination will result in the left-most buddy leaf having
3874 * a maximum free of l2min+1.
3875 * After processing all buddies for a given size, process buddies
3876 * at the next higher buddy size (i.e. current size * 2) and
3877 * the next maximum free (current free + 1).
3878 * This continues until the maximum possible buddy combination
3879 * yields maximum free.
3880 */
3881 for (l2free = dtp->budmin, bsize = 1; l2free < l2max;
3882 l2free++, bsize = nextb) {
3883 /* get next buddy size == current buddy pair size */
3884 nextb = bsize << 1;
3885
3886 /* scan each adjacent buddy pair at current buddy size */
3887 for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx);
3888 i < le32_to_cpu(dtp->nleafs);
3889 i += nextb, cp += nextb) {
3890 /* coalesce if both adjacent buddies are max free */
3891 if (*cp == l2free && *(cp + bsize) == l2free) {
3892 *cp = l2free + 1; /* left take right */
3893 *(cp + bsize) = -1; /* right give left */
3894 }
3895 }
3896 }
3897
3898 /*
3899 * bubble summary information of leaves up the tree.
3900 *
3901 * Starting at the leaf node level, the four nodes described by
3902 * the higher level parent node are compared for a maximum free and
3903 * this maximum becomes the value of the parent node.
3904 * when all lower level nodes are processed in this fashion then
3905 * move up to the next level (parent becomes a lower level node) and
3906 * continue the process for that level.
3907 */
3908 for (child = le32_to_cpu(dtp->leafidx),
3909 nparent = le32_to_cpu(dtp->nleafs) >> 2;
3910 nparent > 0; nparent >>= 2, child = parent) {
3911 /* get index of 1st node of parent level */
3912 parent = (child - 1) >> 2;
3913
3914 /* set the value of the parent node as the maximum
3915 * of the four nodes of the current level.
3916 */
3917 for (i = 0, cp = tp + child, cp1 = tp + parent;
3918 i < nparent; i++, cp += 4, cp1++)
3919 *cp1 = TREEMAX(cp);
3920 }
3921
3922 return (*tp);
3923 }
3924
3925
3926 /*
3927 * dbInitDmapCtl()
3928 *
3929 * function: initialize dmapctl page
3930 */
dbInitDmapCtl(struct dmapctl * dcp,int level,int i)3931 static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i)
3932 { /* start leaf index not covered by range */
3933 s8 *cp;
3934
3935 dcp->nleafs = cpu_to_le32(LPERCTL);
3936 dcp->l2nleafs = cpu_to_le32(L2LPERCTL);
3937 dcp->leafidx = cpu_to_le32(CTLLEAFIND);
3938 dcp->height = cpu_to_le32(5);
3939 dcp->budmin = L2BPERDMAP + L2LPERCTL * level;
3940
3941 /*
3942 * initialize the leaves of current level that were not covered
3943 * by the specified input block range (i.e. the leaves have no
3944 * low level dmapctl or dmap).
3945 */
3946 cp = &dcp->stree[CTLLEAFIND + i];
3947 for (; i < LPERCTL; i++)
3948 *cp++ = NOFREE;
3949
3950 /* build the dmap's binary buddy summary tree */
3951 return (dbInitTree((struct dmaptree *) dcp));
3952 }
3953
3954
3955 /*
3956 * NAME: dbGetL2AGSize()/ujfs_getagl2size()
3957 *
3958 * FUNCTION: Determine log2(allocation group size) from aggregate size
3959 *
3960 * PARAMETERS:
3961 * nblocks - Number of blocks in aggregate
3962 *
3963 * RETURNS: log2(allocation group size) in aggregate blocks
3964 */
dbGetL2AGSize(s64 nblocks)3965 static int dbGetL2AGSize(s64 nblocks)
3966 {
3967 s64 sz;
3968 s64 m;
3969 int l2sz;
3970
3971 if (nblocks < BPERDMAP * MAXAG)
3972 return (L2BPERDMAP);
3973
3974 /* round up aggregate size to power of 2 */
3975 m = ((u64) 1 << (64 - 1));
3976 for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) {
3977 if (m & nblocks)
3978 break;
3979 }
3980
3981 sz = (s64) 1 << l2sz;
3982 if (sz < nblocks)
3983 l2sz += 1;
3984
3985 /* agsize = roundupSize/max_number_of_ag */
3986 return (l2sz - L2MAXAG);
3987 }
3988
3989
3990 /*
3991 * NAME: dbMapFileSizeToMapSize()
3992 *
3993 * FUNCTION: compute number of blocks the block allocation map file
3994 * can cover from the map file size;
3995 *
3996 * RETURNS: Number of blocks which can be covered by this block map file;
3997 */
3998
3999 /*
4000 * maximum number of map pages at each level including control pages
4001 */
4002 #define MAXL0PAGES (1 + LPERCTL)
4003 #define MAXL1PAGES (1 + LPERCTL * MAXL0PAGES)
4004 #define MAXL2PAGES (1 + LPERCTL * MAXL1PAGES)
4005
4006 /*
4007 * convert number of map pages to the zero origin top dmapctl level
4008 */
4009 #define BMAPPGTOLEV(npages) \
4010 (((npages) <= 3 + MAXL0PAGES) ? 0 \
4011 : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
4012
dbMapFileSizeToMapSize(struct inode * ipbmap)4013 s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
4014 {
4015 struct super_block *sb = ipbmap->i_sb;
4016 s64 nblocks;
4017 s64 npages, ndmaps;
4018 int level, i;
4019 int complete, factor;
4020
4021 nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize;
4022 npages = nblocks >> JFS_SBI(sb)->l2nbperpage;
4023 level = BMAPPGTOLEV(npages);
4024
4025 /* At each level, accumulate the number of dmap pages covered by
4026 * the number of full child levels below it;
4027 * repeat for the last incomplete child level.
4028 */
4029 ndmaps = 0;
4030 npages--; /* skip the first global control page */
4031 /* skip higher level control pages above top level covered by map */
4032 npages -= (2 - level);
4033 npages--; /* skip top level's control page */
4034 for (i = level; i >= 0; i--) {
4035 factor =
4036 (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
4037 complete = (u32) npages / factor;
4038 ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL
4039 : ((i == 1) ? LPERCTL : 1));
4040
4041 /* pages in last/incomplete child */
4042 npages = (u32) npages % factor;
4043 /* skip incomplete child's level control page */
4044 npages--;
4045 }
4046
4047 /* convert the number of dmaps into the number of blocks
4048 * which can be covered by the dmaps;
4049 */
4050 nblocks = ndmaps << L2BPERDMAP;
4051
4052 return (nblocks);
4053 }
4054
4055
4056 #ifdef _JFS_DEBUG_DMAP
4057 /*
4058 * DBinitmap()
4059 */
DBinitmap(s64 size,struct inode * ipbmap,u32 ** results)4060 static void DBinitmap(s64 size, struct inode *ipbmap, u32 ** results)
4061 {
4062 int npages;
4063 u32 *dbmap, *d;
4064 int n;
4065 s64 lblkno, cur_block;
4066 struct dmap *dp;
4067 struct metapage *mp;
4068
4069 npages = size / 32768;
4070 npages += (size % 32768) ? 1 : 0;
4071
4072 dbmap = (u32 *) xmalloc(npages * 4096, L2PSIZE, kernel_heap);
4073 if (dbmap == NULL)
4074 BUG(); /* Not robust since this is only unused debug code */
4075
4076 for (n = 0, d = dbmap; n < npages; n++, d += 1024)
4077 bzero(d, 4096);
4078
4079 /* Need to initialize from disk map pages
4080 */
4081 for (d = dbmap, cur_block = 0; cur_block < size;
4082 cur_block += BPERDMAP, d += LPERDMAP) {
4083 lblkno = BLKTODMAP(cur_block,
4084 JFS_SBI(ipbmap->i_sb)->bmap->
4085 db_l2nbperpage);
4086 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
4087 if (mp == NULL) {
4088 jfs_error(ipbmap->i_sb,
4089 "DBinitmap: could not read disk map page");
4090 continue;
4091 }
4092 dp = (struct dmap *) mp->data;
4093
4094 for (n = 0; n < LPERDMAP; n++)
4095 d[n] = le32_to_cpu(dp->wmap[n]);
4096
4097 release_metapage(mp);
4098 }
4099
4100 *results = dbmap;
4101 }
4102
4103
4104 /*
4105 * DBAlloc()
4106 */
DBAlloc(uint * dbmap,s64 mapsize,s64 blkno,s64 nblocks)4107 void DBAlloc(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
4108 {
4109 int word, nb, bitno;
4110 u32 mask;
4111
4112 assert(blkno > 0 && blkno < mapsize);
4113 assert(nblocks > 0 && nblocks <= mapsize);
4114
4115 assert(blkno + nblocks <= mapsize);
4116
4117 dbmap += (blkno / 32);
4118 while (nblocks > 0) {
4119 bitno = blkno & (32 - 1);
4120 nb = min(nblocks, 32 - bitno);
4121
4122 mask = (0xffffffff << (32 - nb) >> bitno);
4123 assert((mask & *dbmap) == 0);
4124 *dbmap |= mask;
4125
4126 dbmap++;
4127 blkno += nb;
4128 nblocks -= nb;
4129 }
4130 }
4131
4132
4133 /*
4134 * DBFree()
4135 */
DBFree(uint * dbmap,s64 mapsize,s64 blkno,s64 nblocks)4136 static void DBFree(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
4137 {
4138 int word, nb, bitno;
4139 u32 mask;
4140
4141 assert(blkno > 0 && blkno < mapsize);
4142 assert(nblocks > 0 && nblocks <= mapsize);
4143
4144 assert(blkno + nblocks <= mapsize);
4145
4146 dbmap += (blkno / 32);
4147 while (nblocks > 0) {
4148 bitno = blkno & (32 - 1);
4149 nb = min(nblocks, 32 - bitno);
4150
4151 mask = (0xffffffff << (32 - nb) >> bitno);
4152 assert((mask & *dbmap) == mask);
4153 *dbmap &= ~mask;
4154
4155 dbmap++;
4156 blkno += nb;
4157 nblocks -= nb;
4158 }
4159 }
4160
4161
4162 /*
4163 * DBAllocCK()
4164 */
DBAllocCK(uint * dbmap,s64 mapsize,s64 blkno,s64 nblocks)4165 static void DBAllocCK(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
4166 {
4167 int word, nb, bitno;
4168 u32 mask;
4169
4170 assert(blkno > 0 && blkno < mapsize);
4171 assert(nblocks > 0 && nblocks <= mapsize);
4172
4173 assert(blkno + nblocks <= mapsize);
4174
4175 dbmap += (blkno / 32);
4176 while (nblocks > 0) {
4177 bitno = blkno & (32 - 1);
4178 nb = min(nblocks, 32 - bitno);
4179
4180 mask = (0xffffffff << (32 - nb) >> bitno);
4181 assert((mask & *dbmap) == mask);
4182
4183 dbmap++;
4184 blkno += nb;
4185 nblocks -= nb;
4186 }
4187 }
4188
4189
4190 /*
4191 * DBFreeCK()
4192 */
DBFreeCK(uint * dbmap,s64 mapsize,s64 blkno,s64 nblocks)4193 static void DBFreeCK(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
4194 {
4195 int word, nb, bitno;
4196 u32 mask;
4197
4198 assert(blkno > 0 && blkno < mapsize);
4199 assert(nblocks > 0 && nblocks <= mapsize);
4200
4201 assert(blkno + nblocks <= mapsize);
4202
4203 dbmap += (blkno / 32);
4204 while (nblocks > 0) {
4205 bitno = blkno & (32 - 1);
4206 nb = min(nblocks, 32 - bitno);
4207
4208 mask = (0xffffffff << (32 - nb) >> bitno);
4209 assert((mask & *dbmap) == 0);
4210
4211 dbmap++;
4212 blkno += nb;
4213 nblocks -= nb;
4214 }
4215 }
4216
4217
4218 /*
4219 * dbPrtMap()
4220 */
dbPrtMap(struct bmap * bmp)4221 static void dbPrtMap(struct bmap * bmp)
4222 {
4223 printk(" mapsize: %d%d\n", bmp->db_mapsize);
4224 printk(" nfree: %d%d\n", bmp->db_nfree);
4225 printk(" numag: %d\n", bmp->db_numag);
4226 printk(" agsize: %d%d\n", bmp->db_agsize);
4227 printk(" agl2size: %d\n", bmp->db_agl2size);
4228 printk(" agwidth: %d\n", bmp->db_agwidth);
4229 printk(" agstart: %d\n", bmp->db_agstart);
4230 printk(" agheigth: %d\n", bmp->db_agheigth);
4231 printk(" aglevel: %d\n", bmp->db_aglevel);
4232 printk(" maxlevel: %d\n", bmp->db_maxlevel);
4233 printk(" maxag: %d\n", bmp->db_maxag);
4234 printk(" agpref: %d\n", bmp->db_agpref);
4235 printk(" l2nbppg: %d\n", bmp->db_l2nbperpage);
4236 }
4237
4238
4239 /*
4240 * dbPrtCtl()
4241 */
dbPrtCtl(struct dmapctl * dcp)4242 static void dbPrtCtl(struct dmapctl * dcp)
4243 {
4244 int i, j, n;
4245
4246 printk(" height: %08x\n", le32_to_cpu(dcp->height));
4247 printk(" leafidx: %08x\n", le32_to_cpu(dcp->leafidx));
4248 printk(" budmin: %08x\n", dcp->budmin);
4249 printk(" nleafs: %08x\n", le32_to_cpu(dcp->nleafs));
4250 printk(" l2nleafs: %08x\n", le32_to_cpu(dcp->l2nleafs));
4251
4252 printk("\n Tree:\n");
4253 for (i = 0; i < CTLLEAFIND; i += 8) {
4254 n = min(8, CTLLEAFIND - i);
4255
4256 for (j = 0; j < n; j++)
4257 printf(" [%03x]: %02x", i + j,
4258 (char) dcp->stree[i + j]);
4259 printf("\n");
4260 }
4261
4262 printk("\n Tree Leaves:\n");
4263 for (i = 0; i < LPERCTL; i += 8) {
4264 n = min(8, LPERCTL - i);
4265
4266 for (j = 0; j < n; j++)
4267 printf(" [%03x]: %02x",
4268 i + j,
4269 (char) dcp->stree[i + j + CTLLEAFIND]);
4270 printf("\n");
4271 }
4272 }
4273 #endif /* _JFS_DEBUG_DMAP */
4274