1 /*
2  *  linux/fs/ext3/namei.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  from
10  *
11  *  linux/fs/minix/namei.c
12  *
13  *  Copyright (C) 1991, 1992  Linus Torvalds
14  *
15  *  Big-endian to little-endian byte-swapping/bitmaps by
16  *        David S. Miller (davem@caip.rutgers.edu), 1995
17  *  Directory entry file type support and forward compatibility hooks
18  *  	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
19  */
20 
21 #include <linux/fs.h>
22 #include <linux/jbd.h>
23 #include <linux/sched.h>
24 #include <linux/ext3_fs.h>
25 #include <linux/ext3_jbd.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/string.h>
29 #include <linux/locks.h>
30 #include <linux/quotaops.h>
31 
32 
33 /*
34  * define how far ahead to read directories while searching them.
35  */
36 #define NAMEI_RA_CHUNKS  2
37 #define NAMEI_RA_BLOCKS  4
38 #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
39 #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
40 
41 /*
42  * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
43  *
44  * `len <= EXT3_NAME_LEN' is guaranteed by caller.
45  * `de != NULL' is guaranteed by caller.
46  */
ext3_match(int len,const char * const name,struct ext3_dir_entry_2 * de)47 static inline int ext3_match (int len, const char * const name,
48 			      struct ext3_dir_entry_2 * de)
49 {
50 	if (len != de->name_len)
51 		return 0;
52 	if (!de->inode)
53 		return 0;
54 	return !memcmp(name, de->name, len);
55 }
56 
57 /*
58  * Returns 0 if not found, -1 on failure, and 1 on success
59  */
search_dirblock(struct buffer_head * bh,struct inode * dir,struct dentry * dentry,unsigned long offset,struct ext3_dir_entry_2 ** res_dir)60 static int inline search_dirblock(struct buffer_head * bh,
61 				  struct inode *dir,
62 				  struct dentry *dentry,
63 				  unsigned long offset,
64 				  struct ext3_dir_entry_2 ** res_dir)
65 {
66 	struct ext3_dir_entry_2 * de;
67 	char * dlimit;
68 	int de_len;
69 	const char *name = dentry->d_name.name;
70 	int namelen = dentry->d_name.len;
71 
72 	de = (struct ext3_dir_entry_2 *) bh->b_data;
73 	dlimit = bh->b_data + dir->i_sb->s_blocksize;
74 	while ((char *) de < dlimit) {
75 		/* this code is executed quadratically often */
76 		/* do minimal checking `by hand' */
77 
78 		if ((char *) de + namelen <= dlimit &&
79 		    ext3_match (namelen, name, de)) {
80 			/* found a match - just to be sure, do a full check */
81 			if (!ext3_check_dir_entry("ext3_find_entry",
82 						  dir, de, bh, offset))
83 				return -1;
84 			*res_dir = de;
85 			return 1;
86 		}
87 		/* prevent looping on a bad block */
88 		de_len = le16_to_cpu(de->rec_len);
89 		if (de_len <= 0)
90 			return -1;
91 		offset += de_len;
92 		de = (struct ext3_dir_entry_2 *) ((char *) de + de_len);
93 	}
94 	return 0;
95 }
96 
97 /*
98  *	ext3_find_entry()
99  *
100  * finds an entry in the specified directory with the wanted name. It
101  * returns the cache buffer in which the entry was found, and the entry
102  * itself (as a parameter - res_dir). It does NOT read the inode of the
103  * entry - you'll have to do that yourself if you want to.
104  *
105  * The returned buffer_head has ->b_count elevated.  The caller is expected
106  * to brelse() it when appropriate.
107  */
ext3_find_entry(struct dentry * dentry,struct ext3_dir_entry_2 ** res_dir)108 static struct buffer_head * ext3_find_entry (struct dentry *dentry,
109 					struct ext3_dir_entry_2 ** res_dir)
110 {
111 	struct super_block * sb;
112 	struct buffer_head * bh_use[NAMEI_RA_SIZE];
113 	struct buffer_head * bh, *ret = NULL;
114 	unsigned long start, block, b;
115 	int ra_max = 0;		/* Number of bh's in the readahead
116 				   buffer, bh_use[] */
117 	int ra_ptr = 0;		/* Current index into readahead
118 				   buffer */
119 	int num = 0;
120 	int nblocks, i, err;
121 	struct inode *dir = dentry->d_parent->d_inode;
122 
123 	*res_dir = NULL;
124 	sb = dir->i_sb;
125 
126 	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
127 	start = dir->u.ext3_i.i_dir_start_lookup;
128 	if (start >= nblocks)
129 		start = 0;
130 	block = start;
131 restart:
132 	do {
133 		/*
134 		 * We deal with the read-ahead logic here.
135 		 */
136 		if (ra_ptr >= ra_max) {
137 			/* Refill the readahead buffer */
138 			ra_ptr = 0;
139 			b = block;
140 			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
141 				/*
142 				 * Terminate if we reach the end of the
143 				 * directory and must wrap, or if our
144 				 * search has finished at this block.
145 				 */
146 				if (b >= nblocks || (num && block == start)) {
147 					bh_use[ra_max] = NULL;
148 					break;
149 				}
150 				num++;
151 				bh = ext3_getblk(NULL, dir, b++, 0, &err);
152 				bh_use[ra_max] = bh;
153 				if (bh)
154 					ll_rw_block(READ, 1, &bh);
155 			}
156 		}
157 		if ((bh = bh_use[ra_ptr++]) == NULL)
158 			goto next;
159 		wait_on_buffer(bh);
160 		if (!buffer_uptodate(bh)) {
161 			/* read error, skip block & hope for the best */
162 			brelse(bh);
163 			goto next;
164 		}
165 		i = search_dirblock(bh, dir, dentry,
166 			    block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
167 		if (i == 1) {
168 			dir->u.ext3_i.i_dir_start_lookup = block;
169 			ret = bh;
170 			goto cleanup_and_exit;
171 		} else {
172 			brelse(bh);
173 			if (i < 0)
174 				goto cleanup_and_exit;
175 		}
176 	next:
177 		if (++block >= nblocks)
178 			block = 0;
179 	} while (block != start);
180 
181 	/*
182 	 * If the directory has grown while we were searching, then
183 	 * search the last part of the directory before giving up.
184 	 */
185 	block = nblocks;
186 	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
187 	if (block < nblocks) {
188 		start = 0;
189 		goto restart;
190 	}
191 
192 cleanup_and_exit:
193 	/* Clean up the read-ahead blocks */
194 	for (; ra_ptr < ra_max; ra_ptr++)
195 		brelse (bh_use[ra_ptr]);
196 	return ret;
197 }
198 
ext3_lookup(struct inode * dir,struct dentry * dentry)199 static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry)
200 {
201 	struct inode * inode;
202 	struct ext3_dir_entry_2 * de;
203 	struct buffer_head * bh;
204 
205 	if (dentry->d_name.len > EXT3_NAME_LEN)
206 		return ERR_PTR(-ENAMETOOLONG);
207 
208 	bh = ext3_find_entry(dentry, &de);
209 	inode = NULL;
210 	if (bh) {
211 		unsigned long ino = le32_to_cpu(de->inode);
212 		brelse (bh);
213 		inode = iget(dir->i_sb, ino);
214 
215 		if (!inode)
216 			return ERR_PTR(-EACCES);
217 	}
218 	d_add(dentry, inode);
219 	return NULL;
220 }
221 
222 #define S_SHIFT 12
223 static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
224 	[S_IFREG >> S_SHIFT]	EXT3_FT_REG_FILE,
225 	[S_IFDIR >> S_SHIFT]	EXT3_FT_DIR,
226 	[S_IFCHR >> S_SHIFT]	EXT3_FT_CHRDEV,
227 	[S_IFBLK >> S_SHIFT]	EXT3_FT_BLKDEV,
228 	[S_IFIFO >> S_SHIFT]	EXT3_FT_FIFO,
229 	[S_IFSOCK >> S_SHIFT]	EXT3_FT_SOCK,
230 	[S_IFLNK >> S_SHIFT]	EXT3_FT_SYMLINK,
231 };
232 
ext3_set_de_type(struct super_block * sb,struct ext3_dir_entry_2 * de,umode_t mode)233 static inline void ext3_set_de_type(struct super_block *sb,
234 				struct ext3_dir_entry_2 *de,
235 				umode_t mode) {
236 	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE))
237 		de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
238 }
239 
240 /*
241  *	ext3_add_entry()
242  *
243  * adds a file entry to the specified directory, using the same
244  * semantics as ext3_find_entry(). It returns NULL if it failed.
245  *
246  * NOTE!! The inode part of 'de' is left at 0 - which means you
247  * may not sleep between calling this and putting something into
248  * the entry, as someone else might have used it while you slept.
249  */
250 
251 /*
252  * AKPM: the journalling code here looks wrong on the error paths
253  */
ext3_add_entry(handle_t * handle,struct dentry * dentry,struct inode * inode)254 static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
255 	struct inode *inode)
256 {
257 	struct inode *dir = dentry->d_parent->d_inode;
258 	const char *name = dentry->d_name.name;
259 	int namelen = dentry->d_name.len;
260 	unsigned long offset;
261 	unsigned short rec_len;
262 	struct buffer_head * bh;
263 	struct ext3_dir_entry_2 * de, * de1;
264 	struct super_block * sb;
265 	int	retval;
266 
267 	sb = dir->i_sb;
268 
269 	if (!namelen)
270 		return -EINVAL;
271 	bh = ext3_bread (handle, dir, 0, 0, &retval);
272 	if (!bh)
273 		return retval;
274 	rec_len = EXT3_DIR_REC_LEN(namelen);
275 	offset = 0;
276 	de = (struct ext3_dir_entry_2 *) bh->b_data;
277 	while (1) {
278 		if ((char *)de >= sb->s_blocksize + bh->b_data) {
279 			brelse (bh);
280 			bh = NULL;
281 			bh = ext3_bread (handle, dir,
282 				offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval);
283 			if (!bh)
284 				return retval;
285 			if (dir->i_size <= offset) {
286 				if (dir->i_size == 0) {
287 					brelse(bh);
288 					return -ENOENT;
289 				}
290 
291 				ext3_debug ("creating next block\n");
292 
293 				BUFFER_TRACE(bh, "get_write_access");
294 				ext3_journal_get_write_access(handle, bh);
295 				de = (struct ext3_dir_entry_2 *) bh->b_data;
296 				de->inode = 0;
297 				de->rec_len = le16_to_cpu(sb->s_blocksize);
298 				dir->u.ext3_i.i_disksize =
299 					dir->i_size = offset + sb->s_blocksize;
300 				dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
301 				ext3_mark_inode_dirty(handle, dir);
302 			} else {
303 
304 				ext3_debug ("skipping to next block\n");
305 
306 				de = (struct ext3_dir_entry_2 *) bh->b_data;
307 			}
308 		}
309 		if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh,
310 					   offset)) {
311 			brelse (bh);
312 			return -ENOENT;
313 		}
314 		if (ext3_match (namelen, name, de)) {
315 				brelse (bh);
316 				return -EEXIST;
317 		}
318 		if ((le32_to_cpu(de->inode) == 0 &&
319 				le16_to_cpu(de->rec_len) >= rec_len) ||
320 		    (le16_to_cpu(de->rec_len) >=
321 				EXT3_DIR_REC_LEN(de->name_len) + rec_len)) {
322 			BUFFER_TRACE(bh, "get_write_access");
323 			ext3_journal_get_write_access(handle, bh);
324 			/* By now the buffer is marked for journaling */
325 			offset += le16_to_cpu(de->rec_len);
326 			if (le32_to_cpu(de->inode)) {
327 				de1 = (struct ext3_dir_entry_2 *) ((char *) de +
328 					EXT3_DIR_REC_LEN(de->name_len));
329 				de1->rec_len =
330 					cpu_to_le16(le16_to_cpu(de->rec_len) -
331 					EXT3_DIR_REC_LEN(de->name_len));
332 				de->rec_len = cpu_to_le16(
333 						EXT3_DIR_REC_LEN(de->name_len));
334 				de = de1;
335 			}
336 			de->file_type = EXT3_FT_UNKNOWN;
337 			if (inode) {
338 				de->inode = cpu_to_le32(inode->i_ino);
339 				ext3_set_de_type(dir->i_sb, de, inode->i_mode);
340 			} else
341 				de->inode = 0;
342 			de->name_len = namelen;
343 			memcpy (de->name, name, namelen);
344 			/*
345 			 * XXX shouldn't update any times until successful
346 			 * completion of syscall, but too many callers depend
347 			 * on this.
348 			 *
349 			 * XXX similarly, too many callers depend on
350 			 * ext3_new_inode() setting the times, but error
351 			 * recovery deletes the inode, so the worst that can
352 			 * happen is that the times are slightly out of date
353 			 * and/or different from the directory change time.
354 			 */
355 			dir->i_mtime = dir->i_ctime = CURRENT_TIME;
356 			dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
357 			dir->i_version = ++event;
358 			ext3_mark_inode_dirty(handle, dir);
359 			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
360 			ext3_journal_dirty_metadata(handle, bh);
361 			brelse(bh);
362 			return 0;
363 		}
364 		offset += le16_to_cpu(de->rec_len);
365 		de = (struct ext3_dir_entry_2 *)
366 			((char *) de + le16_to_cpu(de->rec_len));
367 	}
368 	brelse (bh);
369 	return -ENOSPC;
370 }
371 
372 /*
373  * ext3_delete_entry deletes a directory entry by merging it with the
374  * previous entry
375  */
ext3_delete_entry(handle_t * handle,struct inode * dir,struct ext3_dir_entry_2 * de_del,struct buffer_head * bh)376 static int ext3_delete_entry (handle_t *handle,
377 			      struct inode * dir,
378 			      struct ext3_dir_entry_2 * de_del,
379 			      struct buffer_head * bh)
380 {
381 	struct ext3_dir_entry_2 * de, * pde;
382 	int i;
383 
384 	i = 0;
385 	pde = NULL;
386 	de = (struct ext3_dir_entry_2 *) bh->b_data;
387 	while (i < bh->b_size) {
388 		if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
389 			return -EIO;
390 		if (de == de_del)  {
391 			BUFFER_TRACE(bh, "get_write_access");
392 			ext3_journal_get_write_access(handle, bh);
393 			if (pde)
394 				pde->rec_len =
395 					cpu_to_le16(le16_to_cpu(pde->rec_len) +
396 						    le16_to_cpu(de->rec_len));
397 			else
398 				de->inode = 0;
399 			dir->i_version = ++event;
400 			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
401 			ext3_journal_dirty_metadata(handle, bh);
402 			return 0;
403 		}
404 		i += le16_to_cpu(de->rec_len);
405 		pde = de;
406 		de = (struct ext3_dir_entry_2 *)
407 			((char *) de + le16_to_cpu(de->rec_len));
408 	}
409 	return -ENOENT;
410 }
411 
412 /*
413  * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
414  * do not perform it in these functions.  We perform it at the call site,
415  * if it is needed.
416  */
ext3_inc_count(handle_t * handle,struct inode * inode)417 static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
418 {
419 	inode->i_nlink++;
420 }
421 
ext3_dec_count(handle_t * handle,struct inode * inode)422 static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
423 {
424 	inode->i_nlink--;
425 }
426 
ext3_add_nondir(handle_t * handle,struct dentry * dentry,struct inode * inode)427 static int ext3_add_nondir(handle_t *handle,
428 		struct dentry *dentry, struct inode *inode)
429 {
430 	int err = ext3_add_entry(handle, dentry, inode);
431 	if (!err) {
432 		err = ext3_mark_inode_dirty(handle, inode);
433 		if (err == 0) {
434 			d_instantiate(dentry, inode);
435 			return 0;
436 		}
437 	}
438 	ext3_dec_count(handle, inode);
439 	iput(inode);
440 	return err;
441 }
442 
443 /*
444  * By the time this is called, we already have created
445  * the directory cache entry for the new file, but it
446  * is so far negative - it has no inode.
447  *
448  * If the create succeeds, we fill in the inode information
449  * with d_instantiate().
450  */
ext3_create(struct inode * dir,struct dentry * dentry,int mode)451 static int ext3_create (struct inode * dir, struct dentry * dentry, int mode)
452 {
453 	handle_t *handle;
454 	struct inode * inode;
455 	int err;
456 
457 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
458 	if (IS_ERR(handle))
459 		return PTR_ERR(handle);
460 
461 	if (IS_SYNC(dir))
462 		handle->h_sync = 1;
463 
464 	inode = ext3_new_inode (handle, dir, mode);
465 	err = PTR_ERR(inode);
466 	if (!IS_ERR(inode)) {
467 		inode->i_op = &ext3_file_inode_operations;
468 		inode->i_fop = &ext3_file_operations;
469 		inode->i_mapping->a_ops = &ext3_aops;
470 		err = ext3_add_nondir(handle, dentry, inode);
471 	}
472 	ext3_journal_stop(handle, dir);
473 	return err;
474 }
475 
ext3_mknod(struct inode * dir,struct dentry * dentry,int mode,int rdev)476 static int ext3_mknod (struct inode * dir, struct dentry *dentry,
477 			int mode, int rdev)
478 {
479 	handle_t *handle;
480 	struct inode *inode;
481 	int err;
482 
483 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
484 	if (IS_ERR(handle))
485 		return PTR_ERR(handle);
486 
487 	if (IS_SYNC(dir))
488 		handle->h_sync = 1;
489 
490 	inode = ext3_new_inode (handle, dir, mode);
491 	err = PTR_ERR(inode);
492 	if (!IS_ERR(inode)) {
493 		init_special_inode(inode, mode, rdev);
494 		err = ext3_add_nondir(handle, dentry, inode);
495 	}
496 	ext3_journal_stop(handle, dir);
497 	return err;
498 }
499 
ext3_mkdir(struct inode * dir,struct dentry * dentry,int mode)500 static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
501 {
502 	handle_t *handle;
503 	struct inode * inode;
504 	struct buffer_head * dir_block;
505 	struct ext3_dir_entry_2 * de;
506 	int err;
507 
508 	if (dir->i_nlink >= EXT3_LINK_MAX)
509 		return -EMLINK;
510 
511 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
512 	if (IS_ERR(handle))
513 		return PTR_ERR(handle);
514 
515 	if (IS_SYNC(dir))
516 		handle->h_sync = 1;
517 
518 	inode = ext3_new_inode (handle, dir, S_IFDIR);
519 	err = PTR_ERR(inode);
520 	if (IS_ERR(inode))
521 		goto out_stop;
522 
523 	inode->i_op = &ext3_dir_inode_operations;
524 	inode->i_fop = &ext3_dir_operations;
525 	inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize;
526 	inode->i_blocks = 0;
527 	dir_block = ext3_bread (handle, inode, 0, 1, &err);
528 	if (!dir_block) {
529 		inode->i_nlink--; /* is this nlink == 0? */
530 		ext3_mark_inode_dirty(handle, inode);
531 		iput (inode);
532 		goto out_stop;
533 	}
534 	BUFFER_TRACE(dir_block, "get_write_access");
535 	ext3_journal_get_write_access(handle, dir_block);
536 	de = (struct ext3_dir_entry_2 *) dir_block->b_data;
537 	de->inode = cpu_to_le32(inode->i_ino);
538 	de->name_len = 1;
539 	de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len));
540 	strcpy (de->name, ".");
541 	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
542 	de = (struct ext3_dir_entry_2 *)
543 			((char *) de + le16_to_cpu(de->rec_len));
544 	de->inode = cpu_to_le32(dir->i_ino);
545 	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
546 	de->name_len = 2;
547 	strcpy (de->name, "..");
548 	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
549 	inode->i_nlink = 2;
550 	BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
551 	ext3_journal_dirty_metadata(handle, dir_block);
552 	brelse (dir_block);
553 	inode->i_mode = S_IFDIR | mode;
554 	if (dir->i_mode & S_ISGID)
555 		inode->i_mode |= S_ISGID;
556 	ext3_mark_inode_dirty(handle, inode);
557 	err = ext3_add_entry (handle, dentry, inode);
558 	if (err)
559 		goto out_no_entry;
560 	dir->i_nlink++;
561 	dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
562 	ext3_mark_inode_dirty(handle, dir);
563 	d_instantiate(dentry, inode);
564 out_stop:
565 	ext3_journal_stop(handle, dir);
566 	return err;
567 
568 out_no_entry:
569 	inode->i_nlink = 0;
570 	ext3_mark_inode_dirty(handle, inode);
571 	iput (inode);
572 	goto out_stop;
573 }
574 
575 /*
576  * routine to check that the specified directory is empty (for rmdir)
577  */
empty_dir(struct inode * inode)578 static int empty_dir (struct inode * inode)
579 {
580 	unsigned long offset;
581 	struct buffer_head * bh;
582 	struct ext3_dir_entry_2 * de, * de1;
583 	struct super_block * sb;
584 	int err;
585 
586 	sb = inode->i_sb;
587 	if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
588 	    !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
589 	    	ext3_warning (inode->i_sb, "empty_dir",
590 			      "bad directory (dir #%lu) - no data block",
591 			      inode->i_ino);
592 		return 1;
593 	}
594 	de = (struct ext3_dir_entry_2 *) bh->b_data;
595 	de1 = (struct ext3_dir_entry_2 *)
596 			((char *) de + le16_to_cpu(de->rec_len));
597 	if (le32_to_cpu(de->inode) != inode->i_ino ||
598 			!le32_to_cpu(de1->inode) ||
599 			strcmp (".", de->name) ||
600 			strcmp ("..", de1->name)) {
601 	    	ext3_warning (inode->i_sb, "empty_dir",
602 			      "bad directory (dir #%lu) - no `.' or `..'",
603 			      inode->i_ino);
604 		brelse (bh);
605 		return 1;
606 	}
607 	offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
608 	de = (struct ext3_dir_entry_2 *)
609 			((char *) de1 + le16_to_cpu(de1->rec_len));
610 	while (offset < inode->i_size ) {
611 		if (!bh ||
612 			(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
613 			brelse (bh);
614 			bh = ext3_bread (NULL, inode,
615 				offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
616 			if (!bh) {
617 #if 0
618 				ext3_error (sb, "empty_dir",
619 				"directory #%lu contains a hole at offset %lu",
620 					inode->i_ino, offset);
621 #endif
622 				offset += sb->s_blocksize;
623 				continue;
624 			}
625 			de = (struct ext3_dir_entry_2 *) bh->b_data;
626 		}
627 		if (!ext3_check_dir_entry ("empty_dir", inode, de, bh,
628 					   offset)) {
629 			brelse (bh);
630 			return 1;
631 		}
632 		if (le32_to_cpu(de->inode)) {
633 			brelse (bh);
634 			return 0;
635 		}
636 		offset += le16_to_cpu(de->rec_len);
637 		de = (struct ext3_dir_entry_2 *)
638 				((char *) de + le16_to_cpu(de->rec_len));
639 	}
640 	brelse (bh);
641 	return 1;
642 }
643 
644 /* ext3_orphan_add() links an unlinked or truncated inode into a list of
645  * such inodes, starting at the superblock, in case we crash before the
646  * file is closed/deleted, or in case the inode truncate spans multiple
647  * transactions and the last transaction is not recovered after a crash.
648  *
649  * At filesystem recovery time, we walk this list deleting unlinked
650  * inodes and truncating linked inodes in ext3_orphan_cleanup().
651  */
ext3_orphan_add(handle_t * handle,struct inode * inode)652 int ext3_orphan_add(handle_t *handle, struct inode *inode)
653 {
654 	struct super_block *sb = inode->i_sb;
655 	struct ext3_iloc iloc;
656 	int err = 0, rc;
657 
658 	lock_super(sb);
659 	if (!list_empty(&inode->u.ext3_i.i_orphan))
660 		goto out_unlock;
661 
662 	/* Orphan handling is only valid for files with data blocks
663 	 * being truncated, or files being unlinked. */
664 
665 	/* @@@ FIXME: Observation from aviro:
666 	 * I think I can trigger J_ASSERT in ext3_orphan_add().  We block
667 	 * here (on lock_super()), so race with ext3_link() which might bump
668 	 * ->i_nlink. For, say it, character device. Not a regular file,
669 	 * not a directory, not a symlink and ->i_nlink > 0.
670 	 */
671 	J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
672 		S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
673 
674 	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
675 	err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
676 	if (err)
677 		goto out_unlock;
678 
679 	err = ext3_reserve_inode_write(handle, inode, &iloc);
680 	if (err)
681 		goto out_unlock;
682 
683 	/* Insert this inode at the head of the on-disk orphan list... */
684 	NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan);
685 	EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
686 	err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
687 	rc = ext3_mark_iloc_dirty(handle, inode, &iloc);
688 	if (!err)
689 		err = rc;
690 
691 	/* Only add to the head of the in-memory list if all the
692 	 * previous operations succeeded.  If the orphan_add is going to
693 	 * fail (possibly taking the journal offline), we can't risk
694 	 * leaving the inode on the orphan list: stray orphan-list
695 	 * entries can cause panics at unmount time.
696 	 *
697 	 * This is safe: on error we're going to ignore the orphan list
698 	 * anyway on the next recovery. */
699 	if (!err)
700 		list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan);
701 
702 	jbd_debug(4, "superblock will point to %ld\n", inode->i_ino);
703 	jbd_debug(4, "orphan inode %ld will point to %d\n",
704 			inode->i_ino, NEXT_ORPHAN(inode));
705 out_unlock:
706 	unlock_super(sb);
707 	ext3_std_error(inode->i_sb, err);
708 	return err;
709 }
710 
711 /*
712  * ext3_orphan_del() removes an unlinked or truncated inode from the list
713  * of such inodes stored on disk, because it is finally being cleaned up.
714  */
ext3_orphan_del(handle_t * handle,struct inode * inode)715 int ext3_orphan_del(handle_t *handle, struct inode *inode)
716 {
717 	struct list_head *prev;
718 	struct ext3_sb_info *sbi;
719 	unsigned long ino_next;
720 	struct ext3_iloc iloc;
721 	int err = 0;
722 
723 	lock_super(inode->i_sb);
724 	if (list_empty(&inode->u.ext3_i.i_orphan)) {
725 		unlock_super(inode->i_sb);
726 		return 0;
727 	}
728 
729 	ino_next = NEXT_ORPHAN(inode);
730 	prev = inode->u.ext3_i.i_orphan.prev;
731 	sbi = EXT3_SB(inode->i_sb);
732 
733 	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
734 
735 	list_del(&inode->u.ext3_i.i_orphan);
736 	INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
737 
738 	/* If we're on an error path, we may not have a valid
739 	 * transaction handle with which to update the orphan list on
740 	 * disk, but we still need to remove the inode from the linked
741 	 * list in memory. */
742 	if (!handle)
743 		goto out;
744 
745 	err = ext3_reserve_inode_write(handle, inode, &iloc);
746 	if (err)
747 		goto out_err;
748 
749 	if (prev == &sbi->s_orphan) {
750 		jbd_debug(4, "superblock will point to %lu\n", ino_next);
751 		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
752 		err = ext3_journal_get_write_access(handle, sbi->s_sbh);
753 		if (err)
754 			goto out_brelse;
755 		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
756 		err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
757 	} else {
758 		struct ext3_iloc iloc2;
759 		struct inode *i_prev =
760 			list_entry(prev, struct inode, u.ext3_i.i_orphan);
761 
762 		jbd_debug(4, "orphan inode %lu will point to %lu\n",
763 			  i_prev->i_ino, ino_next);
764 		err = ext3_reserve_inode_write(handle, i_prev, &iloc2);
765 		if (err)
766 			goto out_brelse;
767 		NEXT_ORPHAN(i_prev) = ino_next;
768 		err = ext3_mark_iloc_dirty(handle, i_prev, &iloc2);
769 	}
770 	if (err)
771 		goto out_brelse;
772 	NEXT_ORPHAN(inode) = 0;
773 	err = ext3_mark_iloc_dirty(handle, inode, &iloc);
774 	if (err)
775 		goto out_brelse;
776 
777 out_err:
778 	ext3_std_error(inode->i_sb, err);
779 out:
780 	unlock_super(inode->i_sb);
781 	return err;
782 
783 out_brelse:
784 	brelse(iloc.bh);
785 	goto out_err;
786 }
787 
ext3_rmdir(struct inode * dir,struct dentry * dentry)788 static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
789 {
790 	int retval;
791 	struct inode * inode;
792 	struct buffer_head * bh;
793 	struct ext3_dir_entry_2 * de;
794 	handle_t *handle;
795 
796 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
797 	if (IS_ERR(handle))
798 		return PTR_ERR(handle);
799 
800 	retval = -ENOENT;
801 	bh = ext3_find_entry (dentry, &de);
802 	if (!bh)
803 		goto end_rmdir;
804 
805 	if (IS_SYNC(dir))
806 		handle->h_sync = 1;
807 
808 	inode = dentry->d_inode;
809 	DQUOT_INIT(inode);
810 
811 	retval = -EIO;
812 	if (le32_to_cpu(de->inode) != inode->i_ino)
813 		goto end_rmdir;
814 
815 	retval = -ENOTEMPTY;
816 	if (!empty_dir (inode))
817 		goto end_rmdir;
818 
819 	retval = ext3_delete_entry(handle, dir, de, bh);
820 	if (retval)
821 		goto end_rmdir;
822 	if (inode->i_nlink != 2)
823 		ext3_warning (inode->i_sb, "ext3_rmdir",
824 			      "empty directory has nlink!=2 (%d)",
825 			      inode->i_nlink);
826 	inode->i_version = ++event;
827 	inode->i_nlink = 0;
828 	/* There's no need to set i_disksize: the fact that i_nlink is
829 	 * zero will ensure that the right thing happens during any
830 	 * recovery. */
831 	inode->i_size = 0;
832 	ext3_orphan_add(handle, inode);
833 	dir->i_nlink--;
834 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
835 	ext3_mark_inode_dirty(handle, inode);
836 	dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
837 	ext3_mark_inode_dirty(handle, dir);
838 
839 end_rmdir:
840 	ext3_journal_stop(handle, dir);
841 	brelse (bh);
842 	return retval;
843 }
844 
ext3_unlink(struct inode * dir,struct dentry * dentry)845 static int ext3_unlink(struct inode * dir, struct dentry *dentry)
846 {
847 	int retval;
848 	struct inode * inode;
849 	struct buffer_head * bh;
850 	struct ext3_dir_entry_2 * de;
851 	handle_t *handle;
852 
853 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
854 	if (IS_ERR(handle))
855 		return PTR_ERR(handle);
856 
857 	if (IS_SYNC(dir))
858 		handle->h_sync = 1;
859 
860 	retval = -ENOENT;
861 	bh = ext3_find_entry (dentry, &de);
862 	if (!bh)
863 		goto end_unlink;
864 
865 	inode = dentry->d_inode;
866 	DQUOT_INIT(inode);
867 
868 	retval = -EIO;
869 	if (le32_to_cpu(de->inode) != inode->i_ino)
870 		goto end_unlink;
871 
872 	if (!inode->i_nlink) {
873 		ext3_warning (inode->i_sb, "ext3_unlink",
874 			      "Deleting nonexistent file (%lu), %d",
875 			      inode->i_ino, inode->i_nlink);
876 		inode->i_nlink = 1;
877 	}
878 	retval = ext3_delete_entry(handle, dir, de, bh);
879 	if (retval)
880 		goto end_unlink;
881 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
882 	dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
883 	ext3_mark_inode_dirty(handle, dir);
884 	inode->i_nlink--;
885 	if (!inode->i_nlink)
886 		ext3_orphan_add(handle, inode);
887 	inode->i_ctime = dir->i_ctime;
888 	ext3_mark_inode_dirty(handle, inode);
889 	retval = 0;
890 
891 end_unlink:
892 	ext3_journal_stop(handle, dir);
893 	brelse (bh);
894 	return retval;
895 }
896 
ext3_symlink(struct inode * dir,struct dentry * dentry,const char * symname)897 static int ext3_symlink (struct inode * dir,
898 		struct dentry *dentry, const char * symname)
899 {
900 	handle_t *handle;
901 	struct inode * inode;
902 	int l, err;
903 
904 	l = strlen(symname)+1;
905 	if (l > dir->i_sb->s_blocksize)
906 		return -ENAMETOOLONG;
907 
908 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5);
909 	if (IS_ERR(handle))
910 		return PTR_ERR(handle);
911 
912 	if (IS_SYNC(dir))
913 		handle->h_sync = 1;
914 
915 	inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
916 	err = PTR_ERR(inode);
917 	if (IS_ERR(inode))
918 		goto out_stop;
919 
920 	if (l > sizeof (inode->u.ext3_i.i_data)) {
921 		inode->i_op = &page_symlink_inode_operations;
922 		inode->i_mapping->a_ops = &ext3_aops;
923 		/*
924 		 * block_symlink() calls back into ext3_prepare/commit_write.
925 		 * We have a transaction open.  All is sweetness.  It also sets
926 		 * i_size in generic_commit_write().
927 		 */
928 		err = block_symlink(inode, symname, l);
929 		if (err)
930 			goto out_no_entry;
931 	} else {
932 		inode->i_op = &ext3_fast_symlink_inode_operations;
933 		memcpy((char*)&inode->u.ext3_i.i_data,symname,l);
934 		inode->i_size = l-1;
935 	}
936 	inode->u.ext3_i.i_disksize = inode->i_size;
937 	err = ext3_add_nondir(handle, dentry, inode);
938 out_stop:
939 	ext3_journal_stop(handle, dir);
940 	return err;
941 
942 out_no_entry:
943 	ext3_dec_count(handle, inode);
944 	ext3_mark_inode_dirty(handle, inode);
945 	iput (inode);
946 	goto out_stop;
947 }
948 
ext3_link(struct dentry * old_dentry,struct inode * dir,struct dentry * dentry)949 static int ext3_link (struct dentry * old_dentry,
950 		struct inode * dir, struct dentry *dentry)
951 {
952 	handle_t *handle;
953 	struct inode *inode = old_dentry->d_inode;
954 	int err;
955 
956 	if (S_ISDIR(inode->i_mode))
957 		return -EPERM;
958 
959 	if (inode->i_nlink >= EXT3_LINK_MAX)
960 		return -EMLINK;
961 
962 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS);
963 	if (IS_ERR(handle))
964 		return PTR_ERR(handle);
965 
966 	if (IS_SYNC(dir))
967 		handle->h_sync = 1;
968 
969 	inode->i_ctime = CURRENT_TIME;
970 	ext3_inc_count(handle, inode);
971 	atomic_inc(&inode->i_count);
972 
973 	err = ext3_add_nondir(handle, dentry, inode);
974 	ext3_journal_stop(handle, dir);
975 	return err;
976 }
977 
978 #define PARENT_INO(buffer) \
979 	((struct ext3_dir_entry_2 *) ((char *) buffer + \
980 	le16_to_cpu(((struct ext3_dir_entry_2 *) buffer)->rec_len)))->inode
981 
982 /*
983  * Anybody can rename anything with this: the permission checks are left to the
984  * higher-level routines.
985  */
ext3_rename(struct inode * old_dir,struct dentry * old_dentry,struct inode * new_dir,struct dentry * new_dentry)986 static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
987 			   struct inode * new_dir,struct dentry *new_dentry)
988 {
989 	handle_t *handle;
990 	struct inode * old_inode, * new_inode;
991 	struct buffer_head * old_bh, * new_bh, * dir_bh;
992 	struct ext3_dir_entry_2 * old_de, * new_de;
993 	int retval;
994 
995 	old_bh = new_bh = dir_bh = NULL;
996 
997 	handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2);
998 	if (IS_ERR(handle))
999 		return PTR_ERR(handle);
1000 
1001 	if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
1002 		handle->h_sync = 1;
1003 
1004 	old_bh = ext3_find_entry (old_dentry, &old_de);
1005 	/*
1006 	 *  Check for inode number is _not_ due to possible IO errors.
1007 	 *  We might rmdir the source, keep it as pwd of some process
1008 	 *  and merrily kill the link to whatever was created under the
1009 	 *  same name. Goodbye sticky bit ;-<
1010 	 */
1011 	old_inode = old_dentry->d_inode;
1012 	retval = -ENOENT;
1013 	if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
1014 		goto end_rename;
1015 
1016 	new_inode = new_dentry->d_inode;
1017 	new_bh = ext3_find_entry (new_dentry, &new_de);
1018 	if (new_bh) {
1019 		if (!new_inode) {
1020 			brelse (new_bh);
1021 			new_bh = NULL;
1022 		} else {
1023 			DQUOT_INIT(new_inode);
1024 		}
1025 	}
1026 	if (S_ISDIR(old_inode->i_mode)) {
1027 		if (new_inode) {
1028 			retval = -ENOTEMPTY;
1029 			if (!empty_dir (new_inode))
1030 				goto end_rename;
1031 		}
1032 		retval = -EIO;
1033 		dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval);
1034 		if (!dir_bh)
1035 			goto end_rename;
1036 		if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
1037 			goto end_rename;
1038 		retval = -EMLINK;
1039 		if (!new_inode && new_dir!=old_dir &&
1040 				new_dir->i_nlink >= EXT3_LINK_MAX)
1041 			goto end_rename;
1042 	}
1043 	if (!new_bh) {
1044 		retval = ext3_add_entry (handle, new_dentry, old_inode);
1045 		if (retval)
1046 			goto end_rename;
1047 	} else {
1048 		BUFFER_TRACE(new_bh, "get write access");
1049 		BUFFER_TRACE(new_bh, "get_write_access");
1050 		ext3_journal_get_write_access(handle, new_bh);
1051 		new_de->inode = le32_to_cpu(old_inode->i_ino);
1052 		if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
1053 					      EXT3_FEATURE_INCOMPAT_FILETYPE))
1054 			new_de->file_type = old_de->file_type;
1055 		new_dir->i_version = ++event;
1056 		BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
1057 		ext3_journal_dirty_metadata(handle, new_bh);
1058 		brelse(new_bh);
1059 		new_bh = NULL;
1060 	}
1061 
1062 	/*
1063 	 * Like most other Unix systems, set the ctime for inodes on a
1064 	 * rename.
1065 	 */
1066 	old_inode->i_ctime = CURRENT_TIME;
1067 	ext3_mark_inode_dirty(handle, old_inode);
1068 
1069 	/*
1070 	 * ok, that's it
1071 	 */
1072 	ext3_delete_entry(handle, old_dir, old_de, old_bh);
1073 
1074 	if (new_inode) {
1075 		new_inode->i_nlink--;
1076 		new_inode->i_ctime = CURRENT_TIME;
1077 	}
1078 	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1079 	old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1080 	if (dir_bh) {
1081 		BUFFER_TRACE(dir_bh, "get_write_access");
1082 		ext3_journal_get_write_access(handle, dir_bh);
1083 		PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
1084 		BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
1085 		ext3_journal_dirty_metadata(handle, dir_bh);
1086 		old_dir->i_nlink--;
1087 		if (new_inode) {
1088 			new_inode->i_nlink--;
1089 		} else {
1090 			new_dir->i_nlink++;
1091 			new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
1092 			ext3_mark_inode_dirty(handle, new_dir);
1093 		}
1094 	}
1095 	ext3_mark_inode_dirty(handle, old_dir);
1096 	if (new_inode) {
1097 		ext3_mark_inode_dirty(handle, new_inode);
1098 		if (!new_inode->i_nlink)
1099 			ext3_orphan_add(handle, new_inode);
1100 	}
1101 	retval = 0;
1102 
1103 end_rename:
1104 	brelse (dir_bh);
1105 	brelse (old_bh);
1106 	brelse (new_bh);
1107 	ext3_journal_stop(handle, old_dir);
1108 	return retval;
1109 }
1110 
1111 /*
1112  * directories can handle most operations...
1113  */
1114 struct inode_operations ext3_dir_inode_operations = {
1115 	create:		ext3_create,		/* BKL held */
1116 	lookup:		ext3_lookup,		/* BKL held */
1117 	link:		ext3_link,		/* BKL held */
1118 	unlink:		ext3_unlink,		/* BKL held */
1119 	symlink:	ext3_symlink,		/* BKL held */
1120 	mkdir:		ext3_mkdir,		/* BKL held */
1121 	rmdir:		ext3_rmdir,		/* BKL held */
1122 	mknod:		ext3_mknod,		/* BKL held */
1123 	rename:		ext3_rename,		/* BKL held */
1124 };
1125