1 /*
2  * Copyright 2000-2002 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 #include <linux/config.h>
6 #include <linux/sched.h>
7 #include <linux/reiserfs_fs.h>
8 #include <linux/locks.h>
9 #include <linux/smp_lock.h>
10 #include <asm/uaccess.h>
11 #include <asm/unaligned.h>
12 
13 /* args for the create parameter of reiserfs_get_block */
14 #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
15 #define GET_BLOCK_CREATE 1    /* add anything you need to find block */
16 #define GET_BLOCK_NO_HOLE 2   /* return -ENOENT for file holes */
17 #define GET_BLOCK_READ_DIRECT 4  /* read the tail if indirect item not found */
18 #define GET_BLOCK_NO_ISEM     8 /* i_sem is not held, don't preallocate */
19 
20 static int reiserfs_get_block (struct inode * inode, long block,
21 			       struct buffer_head * bh_result, int create);
22 
23 /* This spinlock guards inode pkey in private part of inode
24    against race between find_actor() vs reiserfs_read_inode2 */
25 static spinlock_t keycopy_lock = SPIN_LOCK_UNLOCKED;
26 
reiserfs_delete_inode(struct inode * inode)27 void reiserfs_delete_inode (struct inode * inode)
28 {
29     int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2;
30     int windex ;
31     struct reiserfs_transaction_handle th ;
32 
33 
34     lock_kernel() ;
35 
36     /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
37     if (INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
38 	down (&inode->i_sem);
39 
40 	journal_begin(&th, inode->i_sb, jbegin_count) ;
41 	reiserfs_update_inode_transaction(inode) ;
42 	windex = push_journal_writer("delete_inode") ;
43 
44 	reiserfs_delete_object (&th, inode);
45 	pop_journal_writer(windex) ;
46 
47 	journal_end(&th, inode->i_sb, jbegin_count) ;
48 
49         up (&inode->i_sem);
50 
51         /* all items of file are deleted, so we can remove "save" link */
52 	remove_save_link (inode, 0/* not truncate */);
53     } else {
54 	/* no object items are in the tree */
55 	;
56     }
57     clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */
58     inode->i_blocks = 0;
59     unlock_kernel() ;
60 }
61 
_make_cpu_key(struct cpu_key * key,int version,__u32 dirid,__u32 objectid,loff_t offset,int type,int length)62 static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid,
63 	       loff_t offset, int type, int length )
64 {
65     key->version = version;
66 
67     key->on_disk_key.k_dir_id = dirid;
68     key->on_disk_key.k_objectid = objectid;
69     set_cpu_key_k_offset (key, offset);
70     set_cpu_key_k_type (key, type);
71     key->key_length = length;
72 }
73 
74 
75 /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
76    offset and type of key */
make_cpu_key(struct cpu_key * key,const struct inode * inode,loff_t offset,int type,int length)77 void make_cpu_key (struct cpu_key * key, const struct inode * inode, loff_t offset,
78 	      int type, int length )
79 {
80   _make_cpu_key (key, get_inode_item_key_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id),
81 		 le32_to_cpu (INODE_PKEY (inode)->k_objectid),
82 		 offset, type, length);
83 }
84 
85 
86 //
87 // when key is 0, do not set version and short key
88 //
make_le_item_head(struct item_head * ih,const struct cpu_key * key,int version,loff_t offset,int type,int length,int entry_count)89 inline void make_le_item_head (struct item_head * ih, const struct cpu_key * key,
90 			       int version,
91 			       loff_t offset, int type, int length,
92 			       int entry_count/*or ih_free_space*/)
93 {
94     if (key) {
95 	ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id);
96 	ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid);
97     }
98     put_ih_version( ih, version );
99     set_le_ih_k_offset (ih, offset);
100     set_le_ih_k_type (ih, type);
101     put_ih_item_len( ih, length );
102     /*    set_ih_free_space (ih, 0);*/
103     // for directory items it is entry count, for directs and stat
104     // datas - 0xffff, for indirects - 0
105     put_ih_entry_count( ih, entry_count );
106 }
107 
add_to_flushlist(struct inode * inode,struct buffer_head * bh)108 static void add_to_flushlist(struct inode *inode, struct buffer_head *bh) {
109     struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ;
110 
111     buffer_insert_list(bh, &j->j_dirty_buffers) ;
112 }
113 
114 //
115 // FIXME: we might cache recently accessed indirect item
116 
117 // Ugh.  Not too eager for that....
118 //  I cut the code until such time as I see a convincing argument (benchmark).
119 // I don't want a bloated inode struct..., and I don't like code complexity....
120 
121 /* cutting the code is fine, since it really isn't in use yet and is easy
122 ** to add back in.  But, Vladimir has a really good idea here.  Think
123 ** about what happens for reading a file.  For each page,
124 ** The VFS layer calls reiserfs_readpage, who searches the tree to find
125 ** an indirect item.  This indirect item has X number of pointers, where
126 ** X is a big number if we've done the block allocation right.  But,
127 ** we only use one or two of these pointers during each call to readpage,
128 ** needlessly researching again later on.
129 **
130 ** The size of the cache could be dynamic based on the size of the file.
131 **
132 ** I'd also like to see us cache the location the stat data item, since
133 ** we are needlessly researching for that frequently.
134 **
135 ** --chris
136 */
137 
138 /* If this page has a file tail in it, and
139 ** it was read in by get_block_create_0, the page data is valid,
140 ** but tail is still sitting in a direct item, and we can't write to
141 ** it.  So, look through this page, and check all the mapped buffers
142 ** to make sure they have valid block numbers.  Any that don't need
143 ** to be unmapped, so that block_prepare_write will correctly call
144 ** reiserfs_get_block to convert the tail into an unformatted node
145 */
fix_tail_page_for_writing(struct page * page)146 static inline void fix_tail_page_for_writing(struct page *page) {
147     struct buffer_head *head, *next, *bh ;
148 
149     if (page && page->buffers) {
150 	head = page->buffers ;
151 	bh = head ;
152 	do {
153 	    next = bh->b_this_page ;
154 	    if (buffer_mapped(bh) && bh->b_blocknr == 0) {
155 	        reiserfs_unmap_buffer(bh) ;
156 	    }
157 	    bh = next ;
158 	} while (bh != head) ;
159     }
160 }
161 
162 /* reiserfs_get_block does not need to allocate a block only if it has been
163    done already or non-hole position has been found in the indirect item */
allocation_needed(int retval,b_blocknr_t allocated,struct item_head * ih,__u32 * item,int pos_in_item)164 static inline int allocation_needed (int retval, b_blocknr_t allocated,
165 				     struct item_head * ih,
166 				     __u32 * item, int pos_in_item)
167 {
168   if (allocated)
169 	 return 0;
170   if (retval == POSITION_FOUND && is_indirect_le_ih (ih) &&
171       get_block_num(item, pos_in_item))
172 	 return 0;
173   return 1;
174 }
175 
indirect_item_found(int retval,struct item_head * ih)176 static inline int indirect_item_found (int retval, struct item_head * ih)
177 {
178   return (retval == POSITION_FOUND) && is_indirect_le_ih (ih);
179 }
180 
181 
set_block_dev_mapped(struct buffer_head * bh,b_blocknr_t block,struct inode * inode)182 static inline void set_block_dev_mapped (struct buffer_head * bh,
183 					 b_blocknr_t block, struct inode * inode)
184 {
185   bh->b_dev = inode->i_dev;
186   bh->b_blocknr = block;
187   bh->b_state |= (1UL << BH_Mapped);
188 }
189 
190 
191 //
192 // files which were created in the earlier version can not be longer,
193 // than 2 gb
194 //
file_capable(struct inode * inode,long block)195 static int file_capable (struct inode * inode, long block)
196 {
197     if (get_inode_item_key_version (inode) != KEY_FORMAT_3_5 || // it is new file.
198 	block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
199 	return 1;
200 
201     return 0;
202 }
203 
restart_transaction(struct reiserfs_transaction_handle * th,struct inode * inode,struct path * path)204 /*static*/ void restart_transaction(struct reiserfs_transaction_handle *th,
205 				struct inode *inode, struct path *path) {
206   struct super_block *s = th->t_super ;
207   int len = th->t_blocks_allocated ;
208 
209   pathrelse(path) ;
210   reiserfs_update_sd(th, inode) ;
211   journal_end(th, s, len) ;
212   journal_begin(th, s, len) ;
213   reiserfs_update_inode_transaction(inode) ;
214 }
215 
216 // it is called by get_block when create == 0. Returns block number
217 // for 'block'-th logical block of file. When it hits direct item it
218 // returns 0 (being called from bmap) or read direct item into piece
219 // of page (bh_result)
220 
221 // Please improve the english/clarity in the comment above, as it is
222 // hard to understand.
223 
_get_block_create_0(struct inode * inode,long block,struct buffer_head * bh_result,int args)224 static int _get_block_create_0 (struct inode * inode, long block,
225 				 struct buffer_head * bh_result,
226 				 int args)
227 {
228     INITIALIZE_PATH (path);
229     struct cpu_key key;
230     struct buffer_head * bh;
231     struct item_head * ih, tmp_ih;
232     int fs_gen ;
233     int blocknr;
234     char * p = NULL;
235     int chars;
236     int ret ;
237     int done = 0 ;
238     unsigned long offset ;
239 
240     // prepare the key to look for the 'block'-th block of file
241     make_cpu_key (&key, inode,
242 		  (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3);
243 
244 research:
245     if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) {
246 	pathrelse (&path);
247         if (p)
248             kunmap(bh_result->b_page) ;
249 	// We do not return -ENOENT if there is a hole but page is uptodate, because it means
250 	// That there is some MMAPED data associated with it that is yet to be written to disk.
251 	if ((args & GET_BLOCK_NO_HOLE) && !Page_Uptodate(bh_result->b_page) ) {
252 	    return -ENOENT ;
253 	}
254         return 0 ;
255     }
256 
257     //
258     bh = get_last_bh (&path);
259     ih = get_ih (&path);
260     if (is_indirect_le_ih (ih)) {
261 	__u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih);
262 
263 	/* FIXME: here we could cache indirect item or part of it in
264 	   the inode to avoid search_by_key in case of subsequent
265 	   access to file */
266 	blocknr = get_block_num(ind_item, path.pos_in_item) ;
267 	ret = 0 ;
268 	if (blocknr) {
269 	    bh_result->b_dev = inode->i_dev;
270 	    bh_result->b_blocknr = blocknr;
271 	    bh_result->b_state |= (1UL << BH_Mapped);
272 	} else
273 	    // We do not return -ENOENT if there is a hole but page is uptodate, because it means
274 	    // That there is some MMAPED data associated with it that is yet to be written to disk.
275 	    if ((args & GET_BLOCK_NO_HOLE) && !Page_Uptodate(bh_result->b_page) ) {
276 		ret = -ENOENT ;
277 	    }
278 
279 	pathrelse (&path);
280         if (p)
281             kunmap(bh_result->b_page) ;
282 	return ret ;
283     }
284 
285     // requested data are in direct item(s)
286     if (!(args & GET_BLOCK_READ_DIRECT)) {
287 	// we are called by bmap. FIXME: we can not map block of file
288 	// when it is stored in direct item(s)
289 	pathrelse (&path);
290         if (p)
291             kunmap(bh_result->b_page) ;
292 	return -ENOENT;
293     }
294 
295     /* if we've got a direct item, and the buffer was uptodate,
296     ** we don't want to pull data off disk again.  skip to the
297     ** end, where we map the buffer and return
298     */
299     if (buffer_uptodate(bh_result)) {
300         goto finished ;
301     } else
302 	/*
303 	** grab_tail_page can trigger calls to reiserfs_get_block on up to date
304 	** pages without any buffers.  If the page is up to date, we don't want
305 	** read old data off disk.  Set the up to date bit on the buffer instead
306 	** and jump to the end
307 	*/
308 	    if (Page_Uptodate(bh_result->b_page)) {
309 		mark_buffer_uptodate(bh_result, 1);
310 		goto finished ;
311     }
312 
313     // read file tail into part of page
314     offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ;
315     fs_gen = get_generation(inode->i_sb) ;
316     copy_item_head (&tmp_ih, ih);
317 
318     /* we only want to kmap if we are reading the tail into the page.
319     ** this is not the common case, so we don't kmap until we are
320     ** sure we need to.  But, this means the item might move if
321     ** kmap schedules
322     */
323     if (!p) {
324 	p = (char *)kmap(bh_result->b_page) ;
325 	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
326 	    goto research;
327 	}
328     }
329     p += offset ;
330     memset (p, 0, inode->i_sb->s_blocksize);
331     do {
332 	if (!is_direct_le_ih (ih)) {
333 	    BUG ();
334         }
335 	/* make sure we don't read more bytes than actually exist in
336 	** the file.  This can happen in odd cases where i_size isn't
337 	** correct, and when direct item padding results in a few
338 	** extra bytes at the end of the direct item
339 	*/
340         if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
341 	    break ;
342 	if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
343 	    chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item;
344 	    done = 1 ;
345 	} else {
346 	    chars = ih_item_len(ih) - path.pos_in_item;
347 	}
348 	memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars);
349 
350 	if (done)
351 	    break ;
352 
353 	p += chars;
354 
355 	if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1))
356 	    // we done, if read direct item is not the last item of
357 	    // node FIXME: we could try to check right delimiting key
358 	    // to see whether direct item continues in the right
359 	    // neighbor or rely on i_size
360 	    break;
361 
362 	// update key to look for the next piece
363 	set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars);
364 	if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND)
365 	    // we read something from tail, even if now we got IO_ERROR
366 	    break;
367 	bh = get_last_bh (&path);
368 	ih = get_ih (&path);
369     } while (1);
370 
371     flush_dcache_page(bh_result->b_page) ;
372     kunmap(bh_result->b_page) ;
373 
374 finished:
375     pathrelse (&path);
376     bh_result->b_blocknr = 0 ;
377     bh_result->b_dev = inode->i_dev;
378     mark_buffer_uptodate (bh_result, 1);
379     bh_result->b_state |= (1UL << BH_Mapped);
380     return 0;
381 }
382 
383 
384 // this is called to create file map. So, _get_block_create_0 will not
385 // read direct item
reiserfs_bmap(struct inode * inode,long block,struct buffer_head * bh_result,int create)386 int reiserfs_bmap (struct inode * inode, long block,
387 		   struct buffer_head * bh_result, int create)
388 {
389     if (!file_capable (inode, block))
390 	return -EFBIG;
391 
392     lock_kernel() ;
393     /* do not read the direct item */
394     _get_block_create_0 (inode, block, bh_result, 0) ;
395     unlock_kernel() ;
396     return 0;
397 }
398 
399 /* special version of get_block that is only used by grab_tail_page right
400 ** now.  It is sent to block_prepare_write, and when you try to get a
401 ** block past the end of the file (or a block from a hole) it returns
402 ** -ENOENT instead of a valid buffer.  block_prepare_write expects to
403 ** be able to do i/o on the buffers returned, unless an error value
404 ** is also returned.
405 **
406 ** So, this allows block_prepare_write to be used for reading a single block
407 ** in a page.  Where it does not produce a valid page for holes, or past the
408 ** end of the file.  This turns out to be exactly what we need for reading
409 ** tails for conversion.
410 **
411 ** The point of the wrapper is forcing a certain value for create, even
412 ** though the VFS layer is calling this function with create==1.  If you
413 ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
414 ** don't use this function.
415 */
reiserfs_get_block_create_0(struct inode * inode,long block,struct buffer_head * bh_result,int create)416 static int reiserfs_get_block_create_0 (struct inode * inode, long block,
417 			struct buffer_head * bh_result, int create) {
418     return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ;
419 }
420 
reiserfs_get_block_direct_io(struct inode * inode,long block,struct buffer_head * bh_result,int create)421 static int reiserfs_get_block_direct_io (struct inode * inode, long block,
422 			struct buffer_head * bh_result, int create) {
423     int ret ;
424 
425     bh_result->b_page = NULL;
426     ret = reiserfs_get_block(inode, block, bh_result, create) ;
427 
428     /* don't allow direct io onto tail pages */
429     if (ret == 0 && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
430 	/* make sure future calls to the direct io funcs for this offset
431 	** in the file fail by unmapping the buffer
432 	*/
433 	reiserfs_unmap_buffer(bh_result);
434         ret = -EINVAL ;
435     }
436     /* Possible unpacked tail. Flush the data before pages have
437        disappeared */
438     if (inode->u.reiserfs_i.i_flags & i_pack_on_close_mask) {
439 	lock_kernel();
440 	reiserfs_commit_for_inode(inode);
441 	inode->u.reiserfs_i.i_flags &= ~i_pack_on_close_mask;
442 	unlock_kernel();
443     }
444     return ret ;
445 }
446 
447 
448 /*
449 ** helper function for when reiserfs_get_block is called for a hole
450 ** but the file tail is still in a direct item
451 ** bh_result is the buffer head for the hole
452 ** tail_offset is the offset of the start of the tail in the file
453 **
454 ** This calls prepare_write, which will start a new transaction
455 ** you should not be in a transaction, or have any paths held when you
456 ** call this.
457 */
convert_tail_for_hole(struct inode * inode,struct buffer_head * bh_result,loff_t tail_offset)458 static int convert_tail_for_hole(struct inode *inode,
459                                  struct buffer_head *bh_result,
460 				 loff_t tail_offset) {
461     unsigned long index ;
462     unsigned long tail_end ;
463     unsigned long tail_start ;
464     struct page * tail_page ;
465     struct page * hole_page = bh_result->b_page ;
466     int retval = 0 ;
467 
468     if ((tail_offset & (bh_result->b_size - 1)) != 1)
469         return -EIO ;
470 
471     /* always try to read until the end of the block */
472     tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ;
473     tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ;
474 
475     index = tail_offset >> PAGE_CACHE_SHIFT ;
476     if ( !hole_page || index != hole_page->index) {
477 	tail_page = grab_cache_page(inode->i_mapping, index) ;
478 	retval = -ENOMEM;
479 	if (!tail_page) {
480 	    goto out ;
481 	}
482     } else {
483         tail_page = hole_page ;
484     }
485 
486     /* we don't have to make sure the conversion did not happen while
487     ** we were locking the page because anyone that could convert
488     ** must first take i_sem.
489     **
490     ** We must fix the tail page for writing because it might have buffers
491     ** that are mapped, but have a block number of 0.  This indicates tail
492     ** data that has been read directly into the page, and block_prepare_write
493     ** won't trigger a get_block in this case.
494     */
495     fix_tail_page_for_writing(tail_page) ;
496     retval = block_prepare_write(tail_page, tail_start, tail_end,
497                                  reiserfs_get_block) ;
498     if (retval)
499         goto unlock ;
500 
501     /* tail conversion might change the data in the page */
502     flush_dcache_page(tail_page) ;
503 
504     retval = generic_commit_write(NULL, tail_page, tail_start, tail_end) ;
505 
506 unlock:
507     if (tail_page != hole_page) {
508         UnlockPage(tail_page) ;
509 	page_cache_release(tail_page) ;
510     }
511 out:
512     return retval ;
513 }
514 
_allocate_block(struct reiserfs_transaction_handle * th,long block,struct inode * inode,b_blocknr_t * allocated_block_nr,struct path * path,int flags)515 static inline int _allocate_block(struct reiserfs_transaction_handle *th,
516 			   long block,
517                            struct inode *inode,
518 			   b_blocknr_t *allocated_block_nr,
519 			   struct path * path,
520 			   int flags) {
521 
522 #ifdef REISERFS_PREALLOCATE
523     if (!(flags & GET_BLOCK_NO_ISEM)) {
524         return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block);
525     }
526 #endif
527     return reiserfs_new_unf_blocknrs (th, inode, allocated_block_nr, path, block);
528 }
529 
reiserfs_get_block(struct inode * inode,long block,struct buffer_head * bh_result,int create)530 static int reiserfs_get_block (struct inode * inode, long block,
531 			       struct buffer_head * bh_result, int create)
532 {
533     int repeat, retval;
534     b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is unsigned long
535     INITIALIZE_PATH(path);
536     int pos_in_item;
537     struct cpu_key key;
538     struct buffer_head * bh, * unbh = 0;
539     struct item_head * ih, tmp_ih;
540     __u32 * item;
541     int done;
542     int fs_gen;
543     int windex ;
544     struct reiserfs_transaction_handle th ;
545     /* space reserved in transaction batch:
546         . 3 balancings in direct->indirect conversion
547         . 1 block involved into reiserfs_update_sd()
548        XXX in practically impossible worst case direct2indirect()
549        can incur (much) more that 3 balancings. */
550     int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1;
551     int version;
552     int transaction_started = 0 ;
553     loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
554 
555 				/* bad.... */
556     lock_kernel() ;
557     th.t_trans_id = 0 ;
558     version = get_inode_item_key_version (inode);
559 
560     if (block < 0) {
561 	unlock_kernel();
562 	return -EIO;
563     }
564 
565     if (!file_capable (inode, block)) {
566 	unlock_kernel() ;
567 	return -EFBIG;
568     }
569 
570     /* if !create, we aren't changing the FS, so we don't need to
571     ** log anything, so we don't need to start a transaction
572     */
573     if (!(create & GET_BLOCK_CREATE)) {
574 	int ret ;
575 	/* find number of block-th logical block of the file */
576 	ret = _get_block_create_0 (inode, block, bh_result,
577 	                           create | GET_BLOCK_READ_DIRECT) ;
578 	unlock_kernel() ;
579 	return ret;
580     }
581 
582     /* If file is of such a size, that it might have a tail and tails are enabled
583     ** we should mark it as possibly needing tail packing on close
584     */
585     if ( (have_large_tails (inode->i_sb) && inode->i_size < block_size (inode)*4) ||
586 	 (have_small_tails (inode->i_sb) && inode->i_size < block_size(inode)) )
587 	inode->u.reiserfs_i.i_flags |= i_pack_on_close_mask;
588 
589     windex = push_journal_writer("reiserfs_get_block") ;
590 
591     /* set the key of the first byte in the 'block'-th block of file */
592     make_cpu_key (&key, inode, new_offset,
593 		  TYPE_ANY, 3/*key length*/);
594     if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
595 	journal_begin(&th, inode->i_sb, jbegin_count) ;
596 	reiserfs_update_inode_transaction(inode) ;
597 	transaction_started = 1 ;
598     }
599  research:
600 
601     retval = search_for_position_by_key (inode->i_sb, &key, &path);
602     if (retval == IO_ERROR) {
603 	retval = -EIO;
604 	goto failure;
605     }
606 
607     bh = get_last_bh (&path);
608     ih = get_ih (&path);
609     item = get_item (&path);
610     pos_in_item = path.pos_in_item;
611 
612     fs_gen = get_generation (inode->i_sb);
613     copy_item_head (&tmp_ih, ih);
614 
615     if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
616 	/* we have to allocate block for the unformatted node */
617 	if (!transaction_started) {
618 	    pathrelse(&path) ;
619 	    journal_begin(&th, inode->i_sb, jbegin_count) ;
620 	    reiserfs_update_inode_transaction(inode) ;
621 	    transaction_started = 1 ;
622 	    goto research ;
623 	}
624 
625 	repeat = _allocate_block(&th, block, inode, &allocated_block_nr, &path, create);
626 
627 	if (repeat == NO_DISK_SPACE) {
628 	    /* restart the transaction to give the journal a chance to free
629 	    ** some blocks.  releases the path, so we have to go back to
630 	    ** research if we succeed on the second try
631 	    */
632 	    restart_transaction(&th, inode, &path) ;
633 	    repeat = _allocate_block(&th, block, inode, &allocated_block_nr, NULL, create);
634 
635 	    if (repeat != NO_DISK_SPACE) {
636 		goto research ;
637 	    }
638 	    retval = -ENOSPC;
639 	    goto failure;
640 	}
641 
642 	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
643 	    goto research;
644 	}
645     }
646 
647     if (indirect_item_found (retval, ih)) {
648         b_blocknr_t unfm_ptr;
649 	/* 'block'-th block is in the file already (there is
650 	   corresponding cell in some indirect item). But it may be
651 	   zero unformatted node pointer (hole) */
652         unfm_ptr = get_block_num (item, pos_in_item);
653 	if (unfm_ptr == 0) {
654 	    /* use allocated block to plug the hole */
655 	    reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
656 	    if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
657 		reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
658 		goto research;
659 	    }
660 	    bh_result->b_state |= (1UL << BH_New);
661 	    put_block_num(item, pos_in_item, allocated_block_nr) ;
662             unfm_ptr = allocated_block_nr;
663 	    journal_mark_dirty (&th, inode->i_sb, bh);
664 	    inode->i_blocks += (inode->i_sb->s_blocksize / 512) ;
665 	    reiserfs_update_sd(&th, inode) ;
666 	}
667 	set_block_dev_mapped(bh_result, unfm_ptr, inode);
668 	pathrelse (&path);
669 	pop_journal_writer(windex) ;
670 	if (transaction_started)
671 	    journal_end(&th, inode->i_sb, jbegin_count) ;
672 
673 	unlock_kernel() ;
674 
675 	/* the item was found, so new blocks were not added to the file
676 	** there is no need to make sure the inode is updated with this
677 	** transaction
678 	*/
679 	return 0;
680     }
681 
682     if (!transaction_started) {
683 	/* if we don't pathrelse, we could vs-3050 on the buffer if
684 	** someone is waiting for it (they can't finish until the buffer
685 	** is released, we can start a new transaction until they finish)
686 	*/
687 	pathrelse(&path) ;
688 	journal_begin(&th, inode->i_sb, jbegin_count) ;
689 	reiserfs_update_inode_transaction(inode) ;
690 	transaction_started = 1 ;
691 	goto research;
692     }
693 
694     /* desired position is not found or is in the direct item. We have
695        to append file with holes up to 'block'-th block converting
696        direct items to indirect one if necessary */
697     done = 0;
698     do {
699 	if (is_statdata_le_ih (ih)) {
700 	    __u32 unp = 0;
701 	    struct cpu_key tmp_key;
702 
703 	    /* indirect item has to be inserted */
704 	    make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT,
705 			       UNFM_P_SIZE, 0/* free_space */);
706 
707 	    if (cpu_key_k_offset (&key) == 1) {
708 		/* we are going to add 'block'-th block to the file. Use
709 		   allocated block for that */
710 		unp = cpu_to_le32 (allocated_block_nr);
711 		set_block_dev_mapped (bh_result, allocated_block_nr, inode);
712 		bh_result->b_state |= (1UL << BH_New);
713 		done = 1;
714 	    }
715 	    tmp_key = key; // ;)
716 	    set_cpu_key_k_offset (&tmp_key, 1);
717 	    PATH_LAST_POSITION(&path) ++;
718 
719 	    retval = reiserfs_insert_item (&th, &path, &tmp_key, &tmp_ih, (char *)&unp);
720 	    if (retval) {
721 		reiserfs_free_block (&th, allocated_block_nr);
722 		goto failure; // retval == -ENOSPC or -EIO or -EEXIST
723 	    }
724 	    if (unp)
725 		inode->i_blocks += inode->i_sb->s_blocksize / 512;
726 	    //mark_tail_converted (inode);
727 	} else if (is_direct_le_ih (ih)) {
728 	    /* direct item has to be converted */
729 	    loff_t tail_offset;
730 
731 	    tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
732 	    if (tail_offset == cpu_key_k_offset (&key)) {
733 		/* direct item we just found fits into block we have
734                    to map. Convert it into unformatted node: use
735                    bh_result for the conversion */
736 		set_block_dev_mapped (bh_result, allocated_block_nr, inode);
737 		unbh = bh_result;
738 		done = 1;
739 	    } else {
740 		/* we have to padd file tail stored in direct item(s)
741 		   up to block size and convert it to unformatted
742 		   node. FIXME: this should also get into page cache */
743 
744 		pathrelse(&path) ;
745 		journal_end(&th, inode->i_sb, jbegin_count) ;
746 		transaction_started = 0 ;
747 
748 		retval = convert_tail_for_hole(inode, bh_result, tail_offset) ;
749 		if (retval) {
750 		    if ( retval != -ENOSPC )
751 			reiserfs_warning(inode->i_sb, "clm-6004: convert tail failed inode %lu, error %d\n", inode->i_ino, retval) ;
752 		    if (allocated_block_nr) {
753 			/* the bitmap, the super, and the stat data == 3 */
754 			journal_begin(&th, inode->i_sb, 3) ;
755 			reiserfs_free_block (&th, allocated_block_nr);
756 			transaction_started = 1 ;
757 		    }
758 		    goto failure ;
759 		}
760 		goto research ;
761 	    }
762 	    retval = direct2indirect (&th, inode, &path, unbh, tail_offset);
763 	    if (retval) {
764 		reiserfs_unmap_buffer(unbh);
765 		reiserfs_free_block (&th, allocated_block_nr);
766 		goto failure;
767 	    }
768 	    /* it is important the mark_buffer_uptodate is done after
769 	    ** the direct2indirect.  The buffer might contain valid
770 	    ** data newer than the data on disk (read by readpage, changed,
771 	    ** and then sent here by writepage).  direct2indirect needs
772 	    ** to know if unbh was already up to date, so it can decide
773 	    ** if the data in unbh needs to be replaced with data from
774 	    ** the disk
775 	    */
776 	    mark_buffer_uptodate (unbh, 1);
777 
778 	    /* unbh->b_page == NULL in case of DIRECT_IO request, this means
779 	       buffer will disappear shortly, so it should not be added to
780 	       any of our lists.
781 	    */
782 	    if ( unbh->b_page ) {
783 		/* we've converted the tail, so we must
784 		** flush unbh before the transaction commits
785 		*/
786 		add_to_flushlist(inode, unbh) ;
787 
788 		/* mark it dirty now to prevent commit_write from adding
789 		 ** this buffer to the inode's dirty buffer list
790 		 */
791 		__mark_buffer_dirty(unbh) ;
792 	    }
793 
794 	    //inode->i_blocks += inode->i_sb->s_blocksize / 512;
795 	    //mark_tail_converted (inode);
796 	} else {
797 	    /* append indirect item with holes if needed, when appending
798 	       pointer to 'block'-th block use block, which is already
799 	       allocated */
800 	    struct cpu_key tmp_key;
801 	    unp_t unf_single=0; // We use this in case we need to allocate only
802 				// one block which is a fastpath
803 	    unp_t *un;
804 	    __u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE;
805 	    __u64 blocks_needed;
806 
807 	    RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
808 		    "vs-804: invalid position for append");
809 	    /* indirect item has to be appended, set up key of that position */
810 	    make_cpu_key (&tmp_key, inode,
811 			  le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize),
812 			  //pos_in_item * inode->i_sb->s_blocksize,
813 			  TYPE_INDIRECT, 3);// key type is unimportant
814 
815 	    blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits);
816 	    RFALSE( blocks_needed < 0, "green-805: invalid offset");
817 
818 	    if ( blocks_needed == 1 ) {
819 		un = &unf_single;
820 	    } else {
821 		un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE,
822 			    GFP_ATOMIC); // We need to avoid scheduling.
823 		if ( !un) {
824 		    un = &unf_single;
825 		    blocks_needed = 1;
826 		    max_to_insert = 0;
827 		} else
828 		    memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert));
829 	    }
830 	    if ( blocks_needed <= max_to_insert) {
831 		/* we are going to add target block to the file. Use allocated
832 		   block for that */
833 		un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr);
834 		set_block_dev_mapped (bh_result, allocated_block_nr, inode);
835 		bh_result->b_state |= (1UL << BH_New);
836 		done = 1;
837 	    } else {
838 		/* paste hole to the indirect item */
839 		/* If kmalloc failed, max_to_insert becomes zero and it means we
840 		   only have space for one block */
841 		blocks_needed=max_to_insert?max_to_insert:1;
842 	    }
843 	    retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)un, UNFM_P_SIZE * blocks_needed);
844 
845 	    if (blocks_needed != 1)
846 		 kfree(un);
847 
848 	    if (retval) {
849 		reiserfs_free_block (&th, allocated_block_nr);
850 		goto failure;
851 	    }
852 	    if (done) {
853 		inode->i_blocks += inode->i_sb->s_blocksize / 512;
854 	    } else {
855 		/* We need to mark new file size in case this function will be
856 		   interrupted/aborted later on. And we may do this only for
857 		   holes. */
858 		inode->i_size += blocks_needed << inode->i_blkbits;
859 	    }
860 	    //mark_tail_converted (inode);
861 	}
862 
863 	if (done == 1)
864 	    break;
865 
866 	/* this loop could log more blocks than we had originally asked
867 	** for.  So, we have to allow the transaction to end if it is
868 	** too big or too full.  Update the inode so things are
869 	** consistent if we crash before the function returns
870 	**
871 	** release the path so that anybody waiting on the path before
872 	** ending their transaction will be able to continue.
873 	*/
874 	if (journal_transaction_should_end(&th, th.t_blocks_allocated)) {
875 	  restart_transaction(&th, inode, &path) ;
876 	}
877 	/* inserting indirect pointers for a hole can take a
878 	** long time.  reschedule if needed
879 	*/
880 	if (current->need_resched)
881 	    schedule() ;
882 
883 	retval = search_for_position_by_key (inode->i_sb, &key, &path);
884 	if (retval == IO_ERROR) {
885 	    retval = -EIO;
886 	    goto failure;
887 	}
888 	if (retval == POSITION_FOUND) {
889 	    reiserfs_warning (inode->i_sb, "vs-825: reiserfs_get_block: "
890 			      "%K should not be found\n", &key);
891 	    retval = -EEXIST;
892 	    if (allocated_block_nr)
893 	        reiserfs_free_block (&th, allocated_block_nr);
894 	    pathrelse(&path) ;
895 	    goto failure;
896 	}
897 	bh = get_last_bh (&path);
898 	ih = get_ih (&path);
899 	item = get_item (&path);
900 	pos_in_item = path.pos_in_item;
901     } while (1);
902 
903 
904     retval = 0;
905     reiserfs_check_path(&path) ;
906 
907  failure:
908     if (transaction_started) {
909       reiserfs_update_sd(&th, inode) ;
910       journal_end(&th, inode->i_sb, jbegin_count) ;
911     }
912     pop_journal_writer(windex) ;
913     unlock_kernel() ;
914     reiserfs_check_path(&path) ;
915     return retval;
916 }
917 
918 
919 //
920 // BAD: new directories have stat data of new type and all other items
921 // of old type. Version stored in the inode says about body items, so
922 // in update_stat_data we can not rely on inode, but have to check
923 // item version directly
924 //
925 
926 // called by read_inode
init_inode(struct inode * inode,struct path * path)927 static void init_inode (struct inode * inode, struct path * path)
928 {
929     struct buffer_head * bh;
930     struct item_head * ih;
931     __u32 rdev;
932     //int version = ITEM_VERSION_1;
933 
934     bh = PATH_PLAST_BUFFER (path);
935     ih = PATH_PITEM_HEAD (path);
936 
937     spin_lock(&keycopy_lock);
938     copy_key (INODE_PKEY (inode), &(ih->ih_key));
939     spin_unlock(&keycopy_lock);
940     inode->i_blksize = PAGE_SIZE;
941 
942     INIT_LIST_HEAD(&inode->u.reiserfs_i.i_prealloc_list) ;
943 
944     if (stat_data_v1 (ih)) {
945 	struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih);
946 	unsigned long blocks;
947 
948 	set_inode_item_key_version (inode, KEY_FORMAT_3_5);
949         set_inode_sd_version (inode, STAT_DATA_V1);
950 	inode->i_mode  = sd_v1_mode(sd);
951 	inode->i_nlink = sd_v1_nlink(sd);
952 	inode->i_uid   = sd_v1_uid(sd);
953 	inode->i_gid   = sd_v1_gid(sd);
954 	inode->i_size  = sd_v1_size(sd);
955 	inode->i_atime = sd_v1_atime(sd);
956 	inode->i_mtime = sd_v1_mtime(sd);
957 	inode->i_ctime = sd_v1_ctime(sd);
958 
959 	inode->i_blocks = sd_v1_blocks(sd);
960 	inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id);
961 	blocks = (inode->i_size + 511) >> 9;
962 	blocks = _ROUND_UP (blocks, inode->i_sb->s_blocksize >> 9);
963 	if (inode->i_blocks > blocks) {
964 	    // there was a bug in <=3.5.23 when i_blocks could take negative
965 	    // values. Starting from 3.5.17 this value could even be stored in
966 	    // stat data. For such files we set i_blocks based on file
967 	    // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
968 	    // only updated if file's inode will ever change
969 	    inode->i_blocks = blocks;
970 	}
971 
972         rdev = sd_v1_rdev(sd);
973 	inode->u.reiserfs_i.i_first_direct_byte = sd_v1_first_direct_byte(sd);
974 	/* nopack is initially zero for v1 objects. For v2 objects,
975 	   nopack is initialised from sd_attrs */
976 	inode->u.reiserfs_i.i_flags &= ~i_nopack_mask;
977     } else {
978 	// new stat data found, but object may have old items
979 	// (directories and symlinks)
980 	struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih);
981 
982 	inode->i_mode   = sd_v2_mode(sd);
983 	inode->i_nlink  = sd_v2_nlink(sd);
984 	inode->i_uid    = sd_v2_uid(sd);
985 	inode->i_size   = sd_v2_size(sd);
986 	inode->i_gid    = sd_v2_gid(sd);
987 	inode->i_mtime  = sd_v2_mtime(sd);
988 	inode->i_atime  = sd_v2_atime(sd);
989 	inode->i_ctime  = sd_v2_ctime(sd);
990 	inode->i_blocks = sd_v2_blocks(sd);
991         rdev            = sd_v2_rdev(sd);
992 	if( S_ISCHR( inode -> i_mode ) || S_ISBLK( inode -> i_mode ) )
993 	    inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id);
994 	else
995             inode->i_generation = sd_v2_generation(sd);
996 
997 	if (S_ISDIR (inode->i_mode) || S_ISLNK (inode->i_mode))
998 	    set_inode_item_key_version (inode, KEY_FORMAT_3_5);
999 	else
1000             set_inode_item_key_version (inode, KEY_FORMAT_3_6);
1001 
1002         set_inode_sd_version (inode, STAT_DATA_V2);
1003 	/* read persistent inode attributes from sd and initalise
1004 	   generic inode flags from them */
1005 	inode -> u.reiserfs_i.i_attrs = sd_v2_attrs( sd );
1006 	sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode );
1007     }
1008 
1009 
1010     pathrelse (path);
1011     if (S_ISREG (inode->i_mode)) {
1012 	inode->i_op = &reiserfs_file_inode_operations;
1013 	inode->i_fop = &reiserfs_file_operations;
1014 	inode->i_mapping->a_ops = &reiserfs_address_space_operations ;
1015     } else if (S_ISDIR (inode->i_mode)) {
1016 	inode->i_op = &reiserfs_dir_inode_operations;
1017 	inode->i_fop = &reiserfs_dir_operations;
1018     } else if (S_ISLNK (inode->i_mode)) {
1019 	inode->i_op = &page_symlink_inode_operations;
1020 	inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1021     } else {
1022 	inode->i_blocks = 0;
1023 	init_special_inode(inode, inode->i_mode, rdev) ;
1024     }
1025 }
1026 
1027 
1028 // update new stat data with inode fields
inode2sd(void * sd,struct inode * inode)1029 static void inode2sd (void * sd, struct inode * inode)
1030 {
1031     struct stat_data * sd_v2 = (struct stat_data *)sd;
1032     __u16 flags;
1033 
1034     set_sd_v2_mode(sd_v2, inode->i_mode );
1035     set_sd_v2_nlink(sd_v2, inode->i_nlink );
1036     set_sd_v2_uid(sd_v2, inode->i_uid );
1037     set_sd_v2_size(sd_v2, inode->i_size );
1038     set_sd_v2_gid(sd_v2, inode->i_gid );
1039     set_sd_v2_mtime(sd_v2, inode->i_mtime );
1040     set_sd_v2_atime(sd_v2, inode->i_atime );
1041     set_sd_v2_ctime(sd_v2, inode->i_ctime );
1042     set_sd_v2_blocks(sd_v2, inode->i_blocks );
1043     if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1044         set_sd_v2_rdev(sd_v2, inode->i_rdev );
1045     else
1046         set_sd_v2_generation(sd_v2, inode->i_generation);
1047     flags = inode -> u.reiserfs_i.i_attrs;
1048     i_attrs_to_sd_attrs( inode, &flags );
1049     set_sd_v2_attrs( sd_v2, flags );
1050 }
1051 
1052 
1053 // used to copy inode's fields to old stat data
inode2sd_v1(void * sd,struct inode * inode)1054 static void inode2sd_v1 (void * sd, struct inode * inode)
1055 {
1056     struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd;
1057 
1058     set_sd_v1_mode(sd_v1, inode->i_mode );
1059     set_sd_v1_uid(sd_v1, inode->i_uid );
1060     set_sd_v1_gid(sd_v1, inode->i_gid );
1061     set_sd_v1_nlink(sd_v1, inode->i_nlink );
1062     set_sd_v1_size(sd_v1, inode->i_size );
1063     set_sd_v1_atime(sd_v1, inode->i_atime );
1064     set_sd_v1_ctime(sd_v1, inode->i_ctime );
1065     set_sd_v1_mtime(sd_v1, inode->i_mtime );
1066 
1067     if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1068         set_sd_v1_rdev(sd_v1, inode->i_rdev );
1069     else
1070         set_sd_v1_blocks(sd_v1, inode->i_blocks );
1071 
1072     // Sigh. i_first_direct_byte is back
1073     set_sd_v1_first_direct_byte(sd_v1, inode->u.reiserfs_i.i_first_direct_byte);
1074 }
1075 
1076 
1077 /* NOTE, you must prepare the buffer head before sending it here,
1078 ** and then log it after the call
1079 */
update_stat_data(struct path * path,struct inode * inode)1080 static void update_stat_data (struct path * path, struct inode * inode)
1081 {
1082     struct buffer_head * bh;
1083     struct item_head * ih;
1084 
1085     bh = PATH_PLAST_BUFFER (path);
1086     ih = PATH_PITEM_HEAD (path);
1087 
1088     if (!is_statdata_le_ih (ih))
1089 	reiserfs_panic (inode->i_sb, "vs-13065: update_stat_data: key %k, found item %h",
1090 			INODE_PKEY (inode), ih);
1091 
1092     if (stat_data_v1 (ih)) {
1093 	// path points to old stat data
1094 	inode2sd_v1 (B_I_PITEM (bh, ih), inode);
1095     } else {
1096 	inode2sd (B_I_PITEM (bh, ih), inode);
1097     }
1098 
1099     return;
1100 }
1101 
1102 
reiserfs_update_sd(struct reiserfs_transaction_handle * th,struct inode * inode)1103 void reiserfs_update_sd (struct reiserfs_transaction_handle *th,
1104 			 struct inode * inode)
1105 {
1106     struct cpu_key key;
1107     INITIALIZE_PATH(path);
1108     struct buffer_head *bh ;
1109     int fs_gen ;
1110     struct item_head *ih, tmp_ih ;
1111     int retval;
1112 
1113     make_cpu_key (&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);//key type is unimportant
1114 
1115     for(;;) {
1116 	int pos;
1117 	/* look for the object's stat data */
1118 	retval = search_item (inode->i_sb, &key, &path);
1119 	if (retval == IO_ERROR) {
1120 	    reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: "
1121 			      "i/o failure occurred trying to update %K stat data\n",
1122 			      &key);
1123 	    return;
1124 	}
1125 	if (retval == ITEM_NOT_FOUND) {
1126 	    pos = PATH_LAST_POSITION (&path);
1127 	    pathrelse(&path) ;
1128 	    if (inode->i_nlink == 0) {
1129 		/*printk ("vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found\n");*/
1130 		return;
1131 	    }
1132 	    reiserfs_warning (inode->i_sb, "vs-13060: reiserfs_update_sd: "
1133 			      "stat data of object %k (nlink == %d) not found (pos %d)\n",
1134 			      INODE_PKEY (inode), inode->i_nlink, pos);
1135 	    reiserfs_check_path(&path) ;
1136 	    return;
1137 	}
1138 
1139 	/* sigh, prepare_for_journal might schedule.  When it schedules the
1140 	** FS might change.  We have to detect that, and loop back to the
1141 	** search if the stat data item has moved
1142 	*/
1143 	bh = get_last_bh(&path) ;
1144 	ih = get_ih(&path) ;
1145 	copy_item_head (&tmp_ih, ih);
1146 	fs_gen = get_generation (inode->i_sb);
1147 	reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1148 	if (fs_changed (fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
1149 	    reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1150 	    continue ;	/* Stat_data item has been moved after scheduling. */
1151 	}
1152 	break;
1153     }
1154     update_stat_data (&path, inode);
1155     journal_mark_dirty(th, th->t_super, bh) ;
1156     pathrelse (&path);
1157     return;
1158 }
1159 
1160 /* We need to clear inode key in private part of inode to avoid races between
1161    blocking iput, knfsd and file deletion with creating of safelinks.*/
reiserfs_make_bad_inode(struct inode * inode)1162 static void reiserfs_make_bad_inode(struct inode *inode) {
1163     memset(INODE_PKEY(inode), 0, KEY_SIZE);
1164     make_bad_inode(inode);
1165 }
1166 
reiserfs_read_inode(struct inode * inode)1167 void reiserfs_read_inode(struct inode *inode) {
1168     reiserfs_make_bad_inode(inode) ;
1169 }
1170 
1171 
1172 /* looks for stat data in the tree, and fills up the fields of in-core
1173    inode stat data fields */
reiserfs_read_inode2(struct inode * inode,void * p)1174 void reiserfs_read_inode2 (struct inode * inode, void *p)
1175 {
1176     INITIALIZE_PATH (path_to_sd);
1177     struct cpu_key key;
1178     struct reiserfs_iget4_args *args = (struct reiserfs_iget4_args *)p ;
1179     unsigned long dirino;
1180     int retval;
1181 
1182     if (!p) {
1183 	reiserfs_make_bad_inode(inode) ;
1184 	return;
1185     }
1186 
1187     dirino = args->objectid ;
1188 
1189     /* set version 1, version 2 could be used too, because stat data
1190        key is the same in both versions */
1191     key.version = KEY_FORMAT_3_5;
1192     key.on_disk_key.k_dir_id = dirino;
1193     key.on_disk_key.k_objectid = inode->i_ino;
1194     key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET;
1195     key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS;
1196 
1197     /* look for the object's stat data */
1198     retval = search_item (inode->i_sb, &key, &path_to_sd);
1199     if (retval == IO_ERROR) {
1200 	reiserfs_warning (inode->i_sb, "vs-13070: reiserfs_read_inode2: "
1201                     "i/o failure occurred trying to find stat data of %K\n",
1202                     &key);
1203 	reiserfs_make_bad_inode(inode) ;
1204 	return;
1205     }
1206     if (retval != ITEM_FOUND) {
1207 	/* a stale NFS handle can trigger this without it being an error */
1208 	pathrelse (&path_to_sd);
1209 	reiserfs_make_bad_inode(inode) ;
1210 	inode->i_nlink = 0;
1211 	return;
1212     }
1213 
1214     init_inode (inode, &path_to_sd);
1215 
1216     /* It is possible that knfsd is trying to access inode of a file
1217        that is being removed from the disk by some other thread. As we
1218        update sd on unlink all that is required is to check for nlink
1219        here. This bug was first found by Sizif when debugging
1220        SquidNG/Butterfly, forgotten, and found again after Philippe
1221        Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1222 
1223        More logical fix would require changes in fs/inode.c:iput() to
1224        remove inode from hash-table _after_ fs cleaned disk stuff up and
1225        in iget() to return NULL if I_FREEING inode is found in
1226        hash-table. */
1227     /* Currently there is one place where it's ok to meet inode with
1228        nlink==0: processing of open-unlinked and half-truncated files
1229        during mount (fs/reiserfs/super.c:finish_unfinished()). */
1230     if( ( inode -> i_nlink == 0 ) &&
1231 	! inode -> i_sb -> u.reiserfs_sb.s_is_unlinked_ok ) {
1232 	    reiserfs_warning( inode->i_sb, "vs-13075: reiserfs_read_inode2: "
1233 			      "dead inode read from disk %K. "
1234 			      "This is likely to be race with knfsd. Ignore\n",
1235 			      &key );
1236 	    reiserfs_make_bad_inode( inode );
1237     }
1238 
1239     reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */
1240 
1241 }
1242 
1243 /**
1244  * reiserfs_find_actor() - "find actor" reiserfs supplies to iget4().
1245  *
1246  * @inode:    inode from hash table to check
1247  * @inode_no: inode number we are looking for
1248  * @opaque:   "cookie" passed to iget4(). This is &reiserfs_iget4_args.
1249  *
1250  * This function is called by iget4() to distinguish reiserfs inodes
1251  * having the same inode numbers. Such inodes can only exist due to some
1252  * error condition. One of them should be bad. Inodes with identical
1253  * inode numbers (objectids) are distinguished by parent directory ids.
1254  *
1255  */
reiserfs_find_actor(struct inode * inode,unsigned long inode_no,void * opaque)1256 static int reiserfs_find_actor( struct inode *inode,
1257 				unsigned long inode_no, void *opaque )
1258 {
1259     struct reiserfs_iget4_args *args;
1260     int retval;
1261 
1262     args = opaque;
1263     /* We protect against possible parallel init_inode() on another CPU here. */
1264     spin_lock(&keycopy_lock);
1265     /* args is already in CPU order */
1266     if (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args -> objectid)
1267 	retval = 1;
1268     else
1269 	/* If The key does not match, lets see if we are racing
1270 	   with another iget4, that already progressed so far
1271 	   to reiserfs_read_inode2() and was preempted in
1272 	   call to search_by_key(). The signs of that are:
1273 	     Inode is locked
1274 	     dirid and object id are zero (not yet initialized)*/
1275 	retval = (inode->i_state & I_LOCK) &&
1276 		 !INODE_PKEY(inode)->k_dir_id &&
1277 		 !INODE_PKEY(inode)->k_objectid;
1278 
1279     spin_unlock(&keycopy_lock);
1280     return retval;
1281 }
1282 
reiserfs_iget(struct super_block * s,const struct cpu_key * key)1283 struct inode * reiserfs_iget (struct super_block * s, const struct cpu_key * key)
1284 {
1285     struct inode * inode;
1286     struct reiserfs_iget4_args args ;
1287 
1288     args.objectid = key->on_disk_key.k_dir_id ;
1289     inode = iget4 (s, key->on_disk_key.k_objectid,
1290 		   reiserfs_find_actor, (void *)(&args));
1291     if (!inode)
1292 	return ERR_PTR(-ENOMEM) ;
1293 
1294     if (comp_short_keys (INODE_PKEY (inode), key) || is_bad_inode (inode)) {
1295 	/* either due to i/o error or a stale NFS handle */
1296 	iput (inode);
1297 	inode = 0;
1298     }
1299     return inode;
1300 }
1301 
reiserfs_fh_to_dentry(struct super_block * sb,__u32 * data,int len,int fhtype,int parent)1302 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, __u32 *data,
1303 				     int len, int fhtype, int parent) {
1304     struct cpu_key key ;
1305     struct inode *inode = NULL ;
1306     struct list_head *lp;
1307     struct dentry *result;
1308 
1309     /* fhtype happens to reflect the number of u32s encoded.
1310      * due to a bug in earlier code, fhtype might indicate there
1311      * are more u32s then actually fitted.
1312      * so if fhtype seems to be more than len, reduce fhtype.
1313      * Valid types are:
1314      *   2 - objectid + dir_id - legacy support
1315      *   3 - objectid + dir_id + generation
1316      *   4 - objectid + dir_id + objectid and dirid of parent - legacy
1317      *   5 - objectid + dir_id + generation + objectid and dirid of parent
1318      *   6 - as above plus generation of directory
1319      * 6 does not fit in NFSv2 handles
1320      */
1321     if (fhtype > len) {
1322 	    if (fhtype != 6 || len != 5)
1323 		    reiserfs_warning(sb, "nfsd/reiserfs, fhtype=%d, len=%d - odd\n",
1324 			   fhtype, len);
1325 	    fhtype = 5;
1326     }
1327     if (fhtype < 2 || (parent && fhtype < 4))
1328 	goto out ;
1329 
1330     if (! parent) {
1331 	    /* this works for handles from old kernels because the default
1332 	    ** reiserfs generation number is the packing locality.
1333 	    */
1334 	    key.on_disk_key.k_objectid = data[0] ;
1335 	    key.on_disk_key.k_dir_id = data[1] ;
1336 	    inode = reiserfs_iget(sb, &key) ;
1337 	    if (inode && !IS_ERR(inode) && (fhtype == 3 || fhtype >= 5) &&
1338 		data[2] != inode->i_generation) {
1339 		    iput(inode) ;
1340 		    inode = NULL ;
1341 	    }
1342     } else {
1343 	    key.on_disk_key.k_objectid = data[fhtype>=5?3:2] ;
1344 	    key.on_disk_key.k_dir_id = data[fhtype>=5?4:3] ;
1345 	    inode = reiserfs_iget(sb, &key) ;
1346 	    if (inode && !IS_ERR(inode) && fhtype == 6 &&
1347 		data[5] != inode->i_generation) {
1348 		    iput(inode) ;
1349 		    inode = NULL ;
1350 	    }
1351     }
1352 out:
1353     if (IS_ERR(inode))
1354 	return ERR_PTR(PTR_ERR(inode));
1355     if (!inode)
1356         return ERR_PTR(-ESTALE) ;
1357 
1358     /* now to find a dentry.
1359      * If possible, get a well-connected one
1360      */
1361     spin_lock(&dcache_lock);
1362     for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) {
1363 	    result = list_entry(lp,struct dentry, d_alias);
1364 	    if (! (result->d_flags & DCACHE_NFSD_DISCONNECTED)) {
1365 		    dget_locked(result);
1366 		    result->d_vfs_flags |= DCACHE_REFERENCED;
1367 		    spin_unlock(&dcache_lock);
1368 		    iput(inode);
1369 		    return result;
1370 	    }
1371     }
1372     spin_unlock(&dcache_lock);
1373     result = d_alloc_root(inode);
1374     if (result == NULL) {
1375 	    iput(inode);
1376 	    return ERR_PTR(-ENOMEM);
1377     }
1378     result->d_flags |= DCACHE_NFSD_DISCONNECTED;
1379     return result;
1380 
1381 }
1382 
reiserfs_dentry_to_fh(struct dentry * dentry,__u32 * data,int * lenp,int need_parent)1383 int reiserfs_dentry_to_fh(struct dentry *dentry, __u32 *data, int *lenp, int need_parent) {
1384     struct inode *inode = dentry->d_inode ;
1385     int maxlen = *lenp;
1386 
1387     if (maxlen < 3)
1388         return 255 ;
1389 
1390     data[0] = inode->i_ino ;
1391     data[1] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1392     data[2] = inode->i_generation ;
1393     *lenp = 3 ;
1394     /* no room for directory info? return what we've stored so far */
1395     if (maxlen < 5 || ! need_parent)
1396         return 3 ;
1397 
1398     inode = dentry->d_parent->d_inode ;
1399     data[3] = inode->i_ino ;
1400     data[4] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1401     *lenp = 5 ;
1402     if (maxlen < 6)
1403 	    return 5 ;
1404     data[5] = inode->i_generation ;
1405     *lenp = 6 ;
1406     return 6 ;
1407 }
1408 
1409 
1410 /* looks for stat data, then copies fields to it, marks the buffer
1411    containing stat data as dirty */
1412 /* reiserfs inodes are never really dirty, since the dirty inode call
1413 ** always logs them.  This call allows the VFS inode marking routines
1414 ** to properly mark inodes for datasync and such, but only actually
1415 ** does something when called for a synchronous update.
1416 */
reiserfs_write_inode(struct inode * inode,int do_sync)1417 void reiserfs_write_inode (struct inode * inode, int do_sync) {
1418     struct reiserfs_transaction_handle th ;
1419     int jbegin_count = 1 ;
1420 
1421     if (inode->i_sb->s_flags & MS_RDONLY) {
1422         reiserfs_warning(inode->i_sb, "clm-6005: writing inode %lu on readonly FS\n",
1423 	                  inode->i_ino) ;
1424         return ;
1425     }
1426     /* memory pressure can sometimes initiate write_inode calls with sync == 1,
1427     ** these cases are just when the system needs ram, not when the
1428     ** inode needs to reach disk for safety, and they can safely be
1429     ** ignored because the altered inode has already been logged.
1430     */
1431     if (do_sync && !(current->flags & PF_MEMALLOC)) {
1432 	lock_kernel() ;
1433 	journal_begin(&th, inode->i_sb, jbegin_count) ;
1434 	reiserfs_update_sd (&th, inode);
1435 	journal_end_sync(&th, inode->i_sb, jbegin_count) ;
1436 	unlock_kernel() ;
1437     }
1438 }
1439 
1440 /* FIXME: no need any more. right? */
reiserfs_sync_inode(struct reiserfs_transaction_handle * th,struct inode * inode)1441 int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode)
1442 {
1443   int err = 0;
1444 
1445   reiserfs_update_sd (th, inode);
1446   return err;
1447 }
1448 
1449 
1450 /* stat data of new object is inserted already, this inserts the item
1451    containing "." and ".." entries */
reiserfs_new_directory(struct reiserfs_transaction_handle * th,struct item_head * ih,struct path * path,const struct inode * dir)1452 static int reiserfs_new_directory (struct reiserfs_transaction_handle *th,
1453 				   struct item_head * ih, struct path * path,
1454 				   const struct inode * dir)
1455 {
1456     struct super_block * sb = th->t_super;
1457     char empty_dir [EMPTY_DIR_SIZE];
1458     char * body = empty_dir;
1459     struct cpu_key key;
1460     int retval;
1461 
1462     _make_cpu_key (&key, KEY_FORMAT_3_5, le32_to_cpu (ih->ih_key.k_dir_id),
1463 		   le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/*key length*/);
1464 
1465     /* compose item head for new item. Directories consist of items of
1466        old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1467        is done by reiserfs_new_inode */
1468     if (old_format_only (sb)) {
1469 	make_le_item_head (ih, 0, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1470 
1471 	make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
1472 				INODE_PKEY (dir)->k_dir_id,
1473 				INODE_PKEY (dir)->k_objectid );
1474     } else {
1475 	make_le_item_head (ih, 0, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1476 
1477 	make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
1478 		   		INODE_PKEY (dir)->k_dir_id,
1479 		   		INODE_PKEY (dir)->k_objectid );
1480     }
1481 
1482     /* look for place in the tree for new item */
1483     retval = search_item (sb, &key, path);
1484     if (retval == IO_ERROR) {
1485 	reiserfs_warning (sb, "vs-13080: reiserfs_new_directory: "
1486 			  "i/o failure occurred creating new directory\n");
1487 	return -EIO;
1488     }
1489     if (retval == ITEM_FOUND) {
1490 	pathrelse (path);
1491 	reiserfs_warning (sb, "vs-13070: reiserfs_new_directory: "
1492 			  "object with this key exists (%k)\n", &(ih->ih_key));
1493 	return -EEXIST;
1494     }
1495 
1496     /* insert item, that is empty directory item */
1497     return reiserfs_insert_item (th, path, &key, ih, body);
1498 }
1499 
1500 
1501 /* stat data of object has been inserted, this inserts the item
1502    containing the body of symlink */
reiserfs_new_symlink(struct reiserfs_transaction_handle * th,struct item_head * ih,struct path * path,const char * symname,int item_len)1503 static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th,
1504 				 struct item_head * ih,
1505 				 struct path * path, const char * symname, int item_len)
1506 {
1507     struct super_block * sb = th->t_super;
1508     struct cpu_key key;
1509     int retval;
1510 
1511     _make_cpu_key (&key, KEY_FORMAT_3_5,
1512 		   le32_to_cpu (ih->ih_key.k_dir_id),
1513 		   le32_to_cpu (ih->ih_key.k_objectid),
1514 		   1, TYPE_DIRECT, 3/*key length*/);
1515 
1516     make_le_item_head (ih, 0, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, 0/*free_space*/);
1517 
1518     /* look for place in the tree for new item */
1519     retval = search_item (sb, &key, path);
1520     if (retval == IO_ERROR) {
1521 	reiserfs_warning (sb, "vs-13080: reiserfs_new_symlinik: "
1522 			  "i/o failure occurred creating new symlink\n");
1523 	return -EIO;
1524     }
1525     if (retval == ITEM_FOUND) {
1526 	pathrelse (path);
1527 	reiserfs_warning (sb, "vs-13080: reiserfs_new_symlink: "
1528 			  "object with this key exists (%k)\n", &(ih->ih_key));
1529 	return -EEXIST;
1530     }
1531 
1532     /* insert item, that is body of symlink */
1533     return reiserfs_insert_item (th, path, &key, ih, symname);
1534 }
1535 
1536 
1537 /* inserts the stat data into the tree, and then calls
1538    reiserfs_new_directory (to insert ".", ".." item if new object is
1539    directory) or reiserfs_new_symlink (to insert symlink body if new
1540    object is symlink) or nothing (if new object is regular file)
1541 
1542    NOTE! uid and gid must already be set in the inode.  If we return
1543    non-zero due to an error, we have to drop the quota previously allocated
1544    for the fresh inode.  This can only be done outside a transaction, so
1545    if we return non-zero, we also end the transaction.
1546 
1547    */
reiserfs_new_inode(struct reiserfs_transaction_handle * th,struct inode * dir,int mode,const char * symname,int i_size,struct dentry * dentry,struct inode * inode)1548 int reiserfs_new_inode (struct reiserfs_transaction_handle *th,
1549 				struct inode * dir, int mode,
1550 				const char * symname,
1551 				/* 0 for regular, EMTRY_DIR_SIZE for dirs,
1552 				   strlen (symname) for symlinks) */
1553 				int i_size,
1554 				struct dentry *dentry,
1555 				struct inode *inode)
1556 {
1557     struct super_block * sb;
1558     INITIALIZE_PATH (path_to_key);
1559     struct cpu_key key;
1560     struct item_head ih;
1561     struct stat_data sd;
1562     int retval;
1563     int err ;
1564 
1565     if (!dir || !dir->i_nlink) {
1566 	err = -EPERM ;
1567 	goto out_bad_inode ;
1568     }
1569 
1570     sb = dir->i_sb;
1571     inode -> u.reiserfs_i.i_attrs =
1572 	    dir -> u.reiserfs_i.i_attrs & REISERFS_INHERIT_MASK;
1573     sd_attrs_to_i_attrs( inode -> u.reiserfs_i.i_attrs, inode );
1574 
1575     /* symlink cannot be immutable or append only, right? */
1576     if( S_ISLNK( inode -> i_mode ) )
1577 	    inode -> i_flags &= ~ ( S_IMMUTABLE | S_APPEND );
1578 
1579     /* item head of new item */
1580     ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid;
1581     ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th));
1582     if (!ih.ih_key.k_objectid) {
1583 	err = -ENOMEM ;
1584 	goto out_bad_inode ;
1585     }
1586     if (old_format_only (sb))
1587       /* not a perfect generation count, as object ids can be reused, but this
1588       ** is as good as reiserfs can do right now.
1589       ** note that the private part of inode isn't filled in yet, we have
1590       ** to use the directory.
1591       */
1592       inode->i_generation = le32_to_cpu (INODE_PKEY (dir)->k_objectid);
1593     else
1594 #if defined( USE_INODE_GENERATION_COUNTER )
1595       inode->i_generation =
1596 	le32_to_cpu( sb -> u.reiserfs_sb.s_rs -> s_inode_generation );
1597 #else
1598       inode->i_generation = ++event;
1599 #endif
1600     /* fill stat data */
1601     inode->i_nlink = (S_ISDIR (mode) ? 2 : 1);
1602 
1603     /* uid and gid must already be set by the caller for quota init */
1604 
1605     inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1606     inode->i_size = i_size;
1607     inode->i_blocks = (inode->i_size + 511) >> 9;
1608     inode->u.reiserfs_i.i_first_direct_byte = S_ISLNK(mode) ? 1 :
1609       U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/;
1610 
1611     INIT_LIST_HEAD(&inode->u.reiserfs_i.i_prealloc_list) ;
1612 
1613     if (old_format_only (sb))
1614 	make_le_item_head (&ih, 0, KEY_FORMAT_3_5, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1615     else
1616 	make_le_item_head (&ih, 0, KEY_FORMAT_3_6, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1617 
1618     /* key to search for correct place for new stat data */
1619     _make_cpu_key (&key, KEY_FORMAT_3_6, le32_to_cpu (ih.ih_key.k_dir_id),
1620 		   le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/);
1621 
1622     /* find proper place for inserting of stat data */
1623     retval = search_item (sb, &key, &path_to_key);
1624     if (retval == IO_ERROR) {
1625 	err = -EIO;
1626 	goto out_bad_inode;
1627     }
1628     if (retval == ITEM_FOUND) {
1629 	pathrelse (&path_to_key);
1630 	err = -EEXIST;
1631 	goto out_bad_inode;
1632     }
1633 
1634     if (old_format_only (sb)) {
1635 	if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
1636 	    pathrelse (&path_to_key);
1637 	    /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1638 	    err = -EINVAL;
1639 	    goto out_bad_inode;
1640 	}
1641 	inode2sd_v1 (&sd, inode);
1642     } else
1643 	inode2sd (&sd, inode);
1644 
1645     // these do not go to on-disk stat data
1646     inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid);
1647     inode->i_blksize = PAGE_SIZE;
1648     inode->i_dev = sb->s_dev;
1649 
1650     // store in in-core inode the key of stat data and version all
1651     // object items will have (directory items will have old offset
1652     // format, other new objects will consist of new items)
1653     memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE);
1654     if (old_format_only (sb) || S_ISDIR(mode) || S_ISLNK(mode))
1655         set_inode_item_key_version (inode, KEY_FORMAT_3_5);
1656     else
1657         set_inode_item_key_version (inode, KEY_FORMAT_3_6);
1658     if (old_format_only (sb))
1659 	set_inode_sd_version (inode, STAT_DATA_V1);
1660     else
1661 	set_inode_sd_version (inode, STAT_DATA_V2);
1662 
1663     /* insert the stat data into the tree */
1664 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1665     if (dir->u.reiserfs_i.new_packing_locality)
1666 	th->displace_new_blocks = 1;
1667 #endif
1668     retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd));
1669     if (retval) {
1670 	reiserfs_check_path(&path_to_key) ;
1671 	err = retval;
1672 	goto out_bad_inode;
1673     }
1674 
1675 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1676     if (!th->displace_new_blocks)
1677 	dir->u.reiserfs_i.new_packing_locality = 0;
1678 #endif
1679     if (S_ISDIR(mode)) {
1680 	/* insert item with "." and ".." */
1681 	retval = reiserfs_new_directory (th, &ih, &path_to_key, dir);
1682     }
1683 
1684     if (S_ISLNK(mode)) {
1685 	/* insert body of symlink */
1686 	if (!old_format_only (sb))
1687 	    i_size = ROUND_UP(i_size);
1688 	retval = reiserfs_new_symlink (th, &ih, &path_to_key, symname, i_size);
1689     }
1690     if (retval) {
1691 	err = retval;
1692 	reiserfs_check_path(&path_to_key) ;
1693 	journal_end(th, th->t_super, th->t_blocks_allocated) ;
1694 	goto out_inserted_sd;
1695     }
1696 
1697     insert_inode_hash (inode);
1698     reiserfs_update_sd(th, inode) ;
1699     reiserfs_check_path(&path_to_key) ;
1700 
1701     return 0;
1702 out_bad_inode:
1703     /* Invalidate the object, nothing was inserted yet */
1704     INODE_PKEY(inode)->k_objectid = 0;
1705 
1706     /* dquot_drop must be done outside a transaction */
1707     journal_end(th, th->t_super, th->t_blocks_allocated) ;
1708     make_bad_inode(inode);
1709 
1710 out_inserted_sd:
1711     inode->i_nlink = 0;
1712     th->t_trans_id = 0 ; /* so the caller can't use this handle later */
1713     iput(inode) ;
1714     return err;
1715 }
1716 
1717 /*
1718 ** finds the tail page in the page cache,
1719 ** reads the last block in.
1720 **
1721 ** On success, page_result is set to a locked, pinned page, and bh_result
1722 ** is set to an up to date buffer for the last block in the file.  returns 0.
1723 **
1724 ** tail conversion is not done, so bh_result might not be valid for writing
1725 ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
1726 ** trying to write the block.
1727 **
1728 ** on failure, nonzero is returned, page_result and bh_result are untouched.
1729 */
grab_tail_page(struct inode * p_s_inode,struct page ** page_result,struct buffer_head ** bh_result)1730 static int grab_tail_page(struct inode *p_s_inode,
1731 			  struct page **page_result,
1732 			  struct buffer_head **bh_result) {
1733 
1734     /* we want the page with the last byte in the file,
1735     ** not the page that will hold the next byte for appending
1736     */
1737     unsigned long index = (p_s_inode->i_size-1) >> PAGE_CACHE_SHIFT ;
1738     unsigned long pos = 0 ;
1739     unsigned long start = 0 ;
1740     unsigned long blocksize = p_s_inode->i_sb->s_blocksize ;
1741     unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1) ;
1742     struct buffer_head *bh ;
1743     struct buffer_head *head ;
1744     struct page * page ;
1745     int error ;
1746 
1747     /* we know that we are only called with inode->i_size > 0.
1748     ** we also know that a file tail can never be as big as a block
1749     ** If i_size % blocksize == 0, our file is currently block aligned
1750     ** and it won't need converting or zeroing after a truncate.
1751     */
1752     if ((offset & (blocksize - 1)) == 0) {
1753         return -ENOENT ;
1754     }
1755     page = grab_cache_page(p_s_inode->i_mapping, index) ;
1756     error = -ENOMEM ;
1757     if (!page) {
1758         goto out ;
1759     }
1760     /* start within the page of the last block in the file */
1761     start = (offset / blocksize) * blocksize ;
1762 
1763     error = block_prepare_write(page, start, offset,
1764 				reiserfs_get_block_create_0) ;
1765     if (error)
1766 	goto unlock ;
1767 
1768     kunmap(page) ; /* mapped by block_prepare_write */
1769 
1770     head = page->buffers ;
1771     bh = head;
1772     do {
1773 	if (pos >= start) {
1774 	    break ;
1775 	}
1776 	bh = bh->b_this_page ;
1777 	pos += blocksize ;
1778     } while(bh != head) ;
1779 
1780     if (!buffer_uptodate(bh)) {
1781 	/* note, this should never happen, prepare_write should
1782 	** be taking care of this for us.  If the buffer isn't up to date,
1783 	** I've screwed up the code to find the buffer, or the code to
1784 	** call prepare_write
1785 	*/
1786 	reiserfs_warning(p_s_inode->i_sb, "clm-6000: error reading block %lu\n",
1787 	                  bh->b_blocknr) ;
1788 	error = -EIO ;
1789 	goto unlock ;
1790     }
1791     *bh_result = bh ;
1792     *page_result = page ;
1793 
1794 out:
1795     return error ;
1796 
1797 unlock:
1798     UnlockPage(page) ;
1799     page_cache_release(page) ;
1800     return error ;
1801 }
1802 
1803 /*
1804 ** vfs version of truncate file.  Must NOT be called with
1805 ** a transaction already started.
1806 **
1807 ** some code taken from block_truncate_page
1808 */
reiserfs_truncate_file(struct inode * p_s_inode,int update_timestamps)1809 void reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) {
1810     struct reiserfs_transaction_handle th ;
1811     int windex ;
1812 
1813     /* we want the offset for the first byte after the end of the file */
1814     unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1) ;
1815     unsigned blocksize = p_s_inode->i_sb->s_blocksize ;
1816     unsigned length ;
1817     struct page *page = NULL ;
1818     int error ;
1819     struct buffer_head *bh = NULL ;
1820 
1821     if (p_s_inode->i_size > 0) {
1822         if ((error = grab_tail_page(p_s_inode, &page, &bh))) {
1823 	    // -ENOENT means we truncated past the end of the file,
1824 	    // and get_block_create_0 could not find a block to read in,
1825 	    // which is ok.
1826 	    if (error != -ENOENT)
1827 	        reiserfs_warning(p_s_inode->i_sb, "clm-6001: grab_tail_page failed %d\n", error);
1828 	    page = NULL ;
1829 	    bh = NULL ;
1830 	}
1831     }
1832 
1833     /* so, if page != NULL, we have a buffer head for the offset at
1834     ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
1835     ** then we have an unformatted node.  Otherwise, we have a direct item,
1836     ** and no zeroing is required on disk.  We zero after the truncate,
1837     ** because the truncate might pack the item anyway
1838     ** (it will unmap bh if it packs).
1839     */
1840     /* it is enough to reserve space in transaction for 2 balancings:
1841        one for "save" link adding and another for the first
1842        cut_from_item. 1 is for update_sd */
1843     journal_begin(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
1844     reiserfs_update_inode_transaction(p_s_inode) ;
1845     windex = push_journal_writer("reiserfs_vfs_truncate_file") ;
1846     if (update_timestamps)
1847 	    /* we are doing real truncate: if the system crashes before the last
1848 	       transaction of truncating gets committed - on reboot the file
1849 	       either appears truncated properly or not truncated at all */
1850 	add_save_link (&th, p_s_inode, 1);
1851     reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
1852     pop_journal_writer(windex) ;
1853     journal_end(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
1854 
1855     if (update_timestamps)
1856 	remove_save_link (p_s_inode, 1/* truncate */);
1857 
1858     if (page) {
1859         length = offset & (blocksize - 1) ;
1860 	/* if we are not on a block boundary */
1861 	if (length) {
1862 	    length = blocksize - length ;
1863 	    memset((char *)kmap(page) + offset, 0, length) ;
1864 	    flush_dcache_page(page) ;
1865 	    kunmap(page) ;
1866 	    if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1867 	        if (!atomic_set_buffer_dirty(bh)) {
1868 			set_buffer_flushtime(bh);
1869 			refile_buffer(bh);
1870 			buffer_insert_inode_data_queue(bh, p_s_inode);
1871 			balance_dirty();
1872 		}
1873 	    }
1874 	}
1875 	UnlockPage(page) ;
1876 	page_cache_release(page) ;
1877     }
1878 
1879     return ;
1880 }
1881 
map_block_for_writepage(struct inode * inode,struct buffer_head * bh_result,unsigned long block)1882 static int map_block_for_writepage(struct inode *inode,
1883 			       struct buffer_head *bh_result,
1884                                unsigned long block) {
1885     struct reiserfs_transaction_handle th ;
1886     int fs_gen ;
1887     struct item_head tmp_ih ;
1888     struct item_head *ih ;
1889     struct buffer_head *bh ;
1890     __u32 *item ;
1891     struct cpu_key key ;
1892     INITIALIZE_PATH(path) ;
1893     int pos_in_item ;
1894     int jbegin_count = JOURNAL_PER_BALANCE_CNT ;
1895     loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ;
1896     int retval ;
1897     int use_get_block = 0 ;
1898     int bytes_copied = 0 ;
1899     int copy_size ;
1900 
1901     kmap(bh_result->b_page) ;
1902 start_over:
1903     lock_kernel() ;
1904     journal_begin(&th, inode->i_sb, jbegin_count) ;
1905     reiserfs_update_inode_transaction(inode) ;
1906 
1907     make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
1908 
1909 research:
1910     retval = search_for_position_by_key(inode->i_sb, &key, &path) ;
1911     if (retval != POSITION_FOUND) {
1912         use_get_block = 1;
1913 	goto out ;
1914     }
1915 
1916     bh = get_last_bh(&path) ;
1917     ih = get_ih(&path) ;
1918     item = get_item(&path) ;
1919     pos_in_item = path.pos_in_item ;
1920 
1921     /* we've found an unformatted node */
1922     if (indirect_item_found(retval, ih)) {
1923 	if (bytes_copied > 0) {
1924 	    reiserfs_warning(inode->i_sb, "clm-6002: bytes_copied %d\n", bytes_copied) ;
1925 	}
1926         if (!get_block_num(item, pos_in_item)) {
1927 	    /* crap, we are writing to a hole */
1928 	    use_get_block = 1;
1929 	    goto out ;
1930 	}
1931 	set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode);
1932         mark_buffer_uptodate(bh_result, 1);
1933     } else if (is_direct_le_ih(ih)) {
1934         char *p ;
1935         p = page_address(bh_result->b_page) ;
1936         p += (byte_offset -1) & (PAGE_CACHE_SIZE - 1) ;
1937         copy_size = ih_item_len(ih) - pos_in_item;
1938 
1939 	fs_gen = get_generation(inode->i_sb) ;
1940 	copy_item_head(&tmp_ih, ih) ;
1941 	reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1942 	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
1943 	    reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1944 	    goto research;
1945 	}
1946 
1947 	memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ;
1948 
1949 	journal_mark_dirty(&th, inode->i_sb, bh) ;
1950 	bytes_copied += copy_size ;
1951 	set_block_dev_mapped(bh_result, 0, inode);
1952         mark_buffer_uptodate(bh_result, 1);
1953 
1954 	/* are there still bytes left? */
1955         if (bytes_copied < bh_result->b_size &&
1956 	    (byte_offset + bytes_copied) < inode->i_size) {
1957 	    set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + copy_size) ;
1958 	    goto research ;
1959 	}
1960     } else {
1961         reiserfs_warning(inode->i_sb, "clm-6003: bad item inode %lu\n", inode->i_ino) ;
1962         retval = -EIO ;
1963 	goto out ;
1964     }
1965     retval = 0 ;
1966 
1967 out:
1968     pathrelse(&path) ;
1969     journal_end(&th, inode->i_sb, jbegin_count) ;
1970     unlock_kernel() ;
1971 
1972     /* this is where we fill in holes in the file. */
1973     if (use_get_block) {
1974 	retval = reiserfs_get_block(inode, block, bh_result,
1975 	                            GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM) ;
1976 	if (!retval) {
1977 	    if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) {
1978 	        /* get_block failed to find a mapped unformatted node. */
1979 		use_get_block = 0 ;
1980 		goto start_over ;
1981 	    }
1982 	}
1983     }
1984     kunmap(bh_result->b_page) ;
1985     return retval ;
1986 }
1987 
1988 /* helper func to get a buffer head ready for writepage to send to
1989 ** ll_rw_block
1990 */
submit_bh_for_writepage(struct buffer_head ** bhp,int nr)1991 static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) {
1992     struct buffer_head *bh ;
1993     int i;
1994 
1995     /* lock them all first so the end_io handler doesn't unlock the page
1996     ** too early
1997     */
1998     for(i = 0 ; i < nr ; i++) {
1999         bh = bhp[i] ;
2000 	lock_buffer(bh) ;
2001 	set_buffer_async_io(bh) ;
2002     }
2003     for(i = 0 ; i < nr ; i++) {
2004 	/* submit_bh doesn't care if the buffer is dirty, but nobody
2005 	** later on in the call chain will be cleaning it.  So, we
2006 	** clean the buffer here, it still gets written either way.
2007 	*/
2008         bh = bhp[i] ;
2009 	clear_bit(BH_Dirty, &bh->b_state) ;
2010 	set_bit(BH_Uptodate, &bh->b_state) ;
2011 	submit_bh(WRITE, bh) ;
2012     }
2013 }
2014 
reiserfs_write_full_page(struct page * page)2015 static int reiserfs_write_full_page(struct page *page) {
2016     struct inode *inode = page->mapping->host ;
2017     unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
2018     unsigned last_offset = PAGE_CACHE_SIZE;
2019     int error = 0;
2020     unsigned long block ;
2021     unsigned cur_offset = 0 ;
2022     struct buffer_head *head, *bh ;
2023     int partial = 0 ;
2024     struct buffer_head *arr[PAGE_CACHE_SIZE/512] ;
2025     int nr = 0 ;
2026 
2027     if (!page->buffers) {
2028         block_prepare_write(page, 0, 0, NULL) ;
2029 	kunmap(page) ;
2030     }
2031     /* last page in the file, zero out any contents past the
2032     ** last byte in the file
2033     */
2034     if (page->index >= end_index) {
2035         last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
2036 	/* no file contents in this page */
2037 	if (page->index >= end_index + 1 || !last_offset) {
2038 	    error =  -EIO ;
2039 	    goto fail ;
2040 	}
2041 	memset((char *)kmap(page)+last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
2042 	flush_dcache_page(page) ;
2043 	kunmap(page) ;
2044     }
2045     head = page->buffers ;
2046     bh = head ;
2047     block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ;
2048     do {
2049 	/* if this offset in the page is outside the file */
2050 	if (cur_offset >= last_offset) {
2051 	    if (!buffer_uptodate(bh))
2052 	        partial = 1 ;
2053 	} else {
2054 	    /* fast path, buffer mapped to an unformatted node */
2055 	    if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2056 		arr[nr++] = bh ;
2057 	    } else {
2058 		/* buffer not mapped yet, or points to a direct item.
2059 		** search and dirty or log
2060 		*/
2061 		if ((error = map_block_for_writepage(inode, bh, block))) {
2062 		    goto fail ;
2063 		}
2064 		/* map_block_for_writepage either found an unformatted node
2065 		** and mapped it for us, or it found a direct item
2066 		** and logged the changes.
2067 		*/
2068 		if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2069 		    arr[nr++] = bh ;
2070 		}
2071 	    }
2072 	}
2073         bh = bh->b_this_page ;
2074 	cur_offset += bh->b_size ;
2075 	block++ ;
2076     } while(bh != head) ;
2077 
2078     /* if this page only had a direct item, it is very possible for
2079     ** nr == 0 without there being any kind of error.
2080     */
2081     if (nr) {
2082         submit_bh_for_writepage(arr, nr) ;
2083 	wakeup_page_waiters(page);
2084     } else {
2085         UnlockPage(page) ;
2086     }
2087     if (!partial)
2088         SetPageUptodate(page) ;
2089 
2090     return 0 ;
2091 
2092 fail:
2093     if (nr) {
2094         submit_bh_for_writepage(arr, nr) ;
2095     } else {
2096         UnlockPage(page) ;
2097     }
2098     ClearPageUptodate(page) ;
2099     return error ;
2100 }
2101 
2102 
reiserfs_readpage(struct file * f,struct page * page)2103 static int reiserfs_readpage (struct file *f, struct page * page)
2104 {
2105     return block_read_full_page (page, reiserfs_get_block);
2106 }
2107 
2108 
reiserfs_writepage(struct page * page)2109 static int reiserfs_writepage (struct page * page)
2110 {
2111     struct inode *inode = page->mapping->host ;
2112     reiserfs_wait_on_write_block(inode->i_sb) ;
2113     return reiserfs_write_full_page(page) ;
2114 }
2115 
2116 
reiserfs_prepare_write(struct file * f,struct page * page,unsigned from,unsigned to)2117 int reiserfs_prepare_write(struct file *f, struct page *page,
2118 			   unsigned from, unsigned to) {
2119     struct inode *inode = page->mapping->host ;
2120     reiserfs_wait_on_write_block(inode->i_sb) ;
2121     fix_tail_page_for_writing(page) ;
2122     return block_prepare_write(page, from, to, reiserfs_get_block) ;
2123 }
2124 
2125 
reiserfs_aop_bmap(struct address_space * as,long block)2126 static int reiserfs_aop_bmap(struct address_space *as, long block) {
2127   return generic_block_bmap(as, block, reiserfs_bmap) ;
2128 }
2129 
reiserfs_commit_write(struct file * f,struct page * page,unsigned from,unsigned to)2130 static int reiserfs_commit_write(struct file *f, struct page *page,
2131                                  unsigned from, unsigned to) {
2132     struct inode *inode = page->mapping->host ;
2133     loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2134     int ret ;
2135 
2136     reiserfs_wait_on_write_block(inode->i_sb) ;
2137 
2138     /* generic_commit_write does this for us, but does not update the
2139     ** transaction tracking stuff when the size changes.  So, we have
2140     ** to do the i_size updates here.
2141     */
2142     if (pos > inode->i_size) {
2143 	struct reiserfs_transaction_handle th ;
2144 	lock_kernel();
2145 	/* If the file have grown beyond the border where it
2146 	   can have a tail, unmark it as needing a tail
2147 	   packing */
2148 	if ( (have_large_tails (inode->i_sb) && inode->i_size > block_size (inode)*4) ||
2149 	     (have_small_tails (inode->i_sb) && inode->i_size > block_size(inode)) )
2150 	    inode->u.reiserfs_i.i_flags &= ~i_pack_on_close_mask;
2151 
2152 	journal_begin(&th, inode->i_sb, 1) ;
2153 	reiserfs_update_inode_transaction(inode) ;
2154 	inode->i_size = pos ;
2155 	reiserfs_update_sd(&th, inode) ;
2156 	journal_end(&th, inode->i_sb, 1) ;
2157 	unlock_kernel();
2158     }
2159 
2160     ret = generic_commit_write(f, page, from, to) ;
2161 
2162     /* we test for O_SYNC here so we can commit the transaction
2163     ** for any packed tails the file might have had
2164     */
2165     if (f && (f->f_flags & O_SYNC)) {
2166 	lock_kernel() ;
2167  	reiserfs_commit_for_inode(inode) ;
2168 	unlock_kernel();
2169     }
2170     return ret ;
2171 }
2172 
sd_attrs_to_i_attrs(__u16 sd_attrs,struct inode * inode)2173 void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode )
2174 {
2175 	if( reiserfs_attrs( inode -> i_sb ) ) {
2176 		if( sd_attrs & REISERFS_SYNC_FL )
2177 			inode -> i_flags |= S_SYNC;
2178 		else
2179 			inode -> i_flags &= ~S_SYNC;
2180 		if( sd_attrs & REISERFS_IMMUTABLE_FL )
2181 			inode -> i_flags |= S_IMMUTABLE;
2182 		else
2183 			inode -> i_flags &= ~S_IMMUTABLE;
2184 		if( sd_attrs & REISERFS_APPEND_FL )
2185 			inode -> i_flags |= S_APPEND;
2186 		else
2187 			inode -> i_flags &= ~S_APPEND;
2188 		if( sd_attrs & REISERFS_NOATIME_FL )
2189 			inode -> i_flags |= S_NOATIME;
2190 		else
2191 			inode -> i_flags &= ~S_NOATIME;
2192 		if( sd_attrs & REISERFS_NOTAIL_FL )
2193 			inode->u.reiserfs_i.i_flags |= i_nopack_mask;
2194 		else
2195 			inode->u.reiserfs_i.i_flags &= ~i_nopack_mask;
2196 	}
2197 }
2198 
i_attrs_to_sd_attrs(struct inode * inode,__u16 * sd_attrs)2199 void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs )
2200 {
2201 	if( reiserfs_attrs( inode -> i_sb ) ) {
2202 		if( inode -> i_flags & S_IMMUTABLE )
2203 			*sd_attrs |= REISERFS_IMMUTABLE_FL;
2204 		else
2205 			*sd_attrs &= ~REISERFS_IMMUTABLE_FL;
2206 		if( inode -> i_flags & S_SYNC )
2207 			*sd_attrs |= REISERFS_SYNC_FL;
2208 		else
2209 			*sd_attrs &= ~REISERFS_SYNC_FL;
2210 		if( inode -> i_flags & S_NOATIME )
2211 			*sd_attrs |= REISERFS_NOATIME_FL;
2212 		else
2213 			*sd_attrs &= ~REISERFS_NOATIME_FL;
2214 		if( inode->u.reiserfs_i.i_flags & i_nopack_mask )
2215 			*sd_attrs |= REISERFS_NOTAIL_FL;
2216 		else
2217 			*sd_attrs &= ~REISERFS_NOTAIL_FL;
2218 	}
2219 }
2220 
reiserfs_direct_io(int rw,struct inode * inode,struct kiobuf * iobuf,unsigned long blocknr,int blocksize)2221 static int reiserfs_direct_io(int rw, struct inode *inode,
2222                               struct kiobuf *iobuf, unsigned long blocknr,
2223 			      int blocksize)
2224 {
2225     lock_kernel();
2226     reiserfs_commit_for_tail(inode);
2227     unlock_kernel();
2228     return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize,
2229                              reiserfs_get_block_direct_io) ;
2230 }
2231 
2232 struct address_space_operations reiserfs_address_space_operations = {
2233     writepage: reiserfs_writepage,
2234     readpage: reiserfs_readpage,
2235     sync_page: block_sync_page,
2236     prepare_write: reiserfs_prepare_write,
2237     commit_write: reiserfs_commit_write,
2238     bmap: reiserfs_aop_bmap,
2239     direct_IO: reiserfs_direct_io,
2240 } ;
2241