1 /*
2 * Copyright 2000-2002 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5 #include <linux/config.h>
6 #include <linux/sched.h>
7 #include <linux/reiserfs_fs.h>
8 #include <linux/locks.h>
9 #include <linux/smp_lock.h>
10 #include <asm/uaccess.h>
11 #include <asm/unaligned.h>
12
13 /* args for the create parameter of reiserfs_get_block */
14 #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
15 #define GET_BLOCK_CREATE 1 /* add anything you need to find block */
16 #define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */
17 #define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */
18 #define GET_BLOCK_NO_ISEM 8 /* i_sem is not held, don't preallocate */
19
20 static int reiserfs_get_block (struct inode * inode, long block,
21 struct buffer_head * bh_result, int create);
22
23 /* This spinlock guards inode pkey in private part of inode
24 against race between find_actor() vs reiserfs_read_inode2 */
25 static spinlock_t keycopy_lock = SPIN_LOCK_UNLOCKED;
26
reiserfs_delete_inode(struct inode * inode)27 void reiserfs_delete_inode (struct inode * inode)
28 {
29 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2;
30 int windex ;
31 struct reiserfs_transaction_handle th ;
32
33
34 lock_kernel() ;
35
36 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
37 if (INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
38 down (&inode->i_sem);
39
40 journal_begin(&th, inode->i_sb, jbegin_count) ;
41 reiserfs_update_inode_transaction(inode) ;
42 windex = push_journal_writer("delete_inode") ;
43
44 reiserfs_delete_object (&th, inode);
45 pop_journal_writer(windex) ;
46
47 journal_end(&th, inode->i_sb, jbegin_count) ;
48
49 up (&inode->i_sem);
50
51 /* all items of file are deleted, so we can remove "save" link */
52 remove_save_link (inode, 0/* not truncate */);
53 } else {
54 /* no object items are in the tree */
55 ;
56 }
57 clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */
58 inode->i_blocks = 0;
59 unlock_kernel() ;
60 }
61
_make_cpu_key(struct cpu_key * key,int version,__u32 dirid,__u32 objectid,loff_t offset,int type,int length)62 static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid,
63 loff_t offset, int type, int length )
64 {
65 key->version = version;
66
67 key->on_disk_key.k_dir_id = dirid;
68 key->on_disk_key.k_objectid = objectid;
69 set_cpu_key_k_offset (key, offset);
70 set_cpu_key_k_type (key, type);
71 key->key_length = length;
72 }
73
74
75 /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
76 offset and type of key */
make_cpu_key(struct cpu_key * key,const struct inode * inode,loff_t offset,int type,int length)77 void make_cpu_key (struct cpu_key * key, const struct inode * inode, loff_t offset,
78 int type, int length )
79 {
80 _make_cpu_key (key, get_inode_item_key_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id),
81 le32_to_cpu (INODE_PKEY (inode)->k_objectid),
82 offset, type, length);
83 }
84
85
86 //
87 // when key is 0, do not set version and short key
88 //
make_le_item_head(struct item_head * ih,const struct cpu_key * key,int version,loff_t offset,int type,int length,int entry_count)89 inline void make_le_item_head (struct item_head * ih, const struct cpu_key * key,
90 int version,
91 loff_t offset, int type, int length,
92 int entry_count/*or ih_free_space*/)
93 {
94 if (key) {
95 ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id);
96 ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid);
97 }
98 put_ih_version( ih, version );
99 set_le_ih_k_offset (ih, offset);
100 set_le_ih_k_type (ih, type);
101 put_ih_item_len( ih, length );
102 /* set_ih_free_space (ih, 0);*/
103 // for directory items it is entry count, for directs and stat
104 // datas - 0xffff, for indirects - 0
105 put_ih_entry_count( ih, entry_count );
106 }
107
add_to_flushlist(struct inode * inode,struct buffer_head * bh)108 static void add_to_flushlist(struct inode *inode, struct buffer_head *bh) {
109 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ;
110
111 buffer_insert_list(bh, &j->j_dirty_buffers) ;
112 }
113
114 //
115 // FIXME: we might cache recently accessed indirect item
116
117 // Ugh. Not too eager for that....
118 // I cut the code until such time as I see a convincing argument (benchmark).
119 // I don't want a bloated inode struct..., and I don't like code complexity....
120
121 /* cutting the code is fine, since it really isn't in use yet and is easy
122 ** to add back in. But, Vladimir has a really good idea here. Think
123 ** about what happens for reading a file. For each page,
124 ** The VFS layer calls reiserfs_readpage, who searches the tree to find
125 ** an indirect item. This indirect item has X number of pointers, where
126 ** X is a big number if we've done the block allocation right. But,
127 ** we only use one or two of these pointers during each call to readpage,
128 ** needlessly researching again later on.
129 **
130 ** The size of the cache could be dynamic based on the size of the file.
131 **
132 ** I'd also like to see us cache the location the stat data item, since
133 ** we are needlessly researching for that frequently.
134 **
135 ** --chris
136 */
137
138 /* If this page has a file tail in it, and
139 ** it was read in by get_block_create_0, the page data is valid,
140 ** but tail is still sitting in a direct item, and we can't write to
141 ** it. So, look through this page, and check all the mapped buffers
142 ** to make sure they have valid block numbers. Any that don't need
143 ** to be unmapped, so that block_prepare_write will correctly call
144 ** reiserfs_get_block to convert the tail into an unformatted node
145 */
fix_tail_page_for_writing(struct page * page)146 static inline void fix_tail_page_for_writing(struct page *page) {
147 struct buffer_head *head, *next, *bh ;
148
149 if (page && page->buffers) {
150 head = page->buffers ;
151 bh = head ;
152 do {
153 next = bh->b_this_page ;
154 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
155 reiserfs_unmap_buffer(bh) ;
156 }
157 bh = next ;
158 } while (bh != head) ;
159 }
160 }
161
162 /* reiserfs_get_block does not need to allocate a block only if it has been
163 done already or non-hole position has been found in the indirect item */
allocation_needed(int retval,b_blocknr_t allocated,struct item_head * ih,__u32 * item,int pos_in_item)164 static inline int allocation_needed (int retval, b_blocknr_t allocated,
165 struct item_head * ih,
166 __u32 * item, int pos_in_item)
167 {
168 if (allocated)
169 return 0;
170 if (retval == POSITION_FOUND && is_indirect_le_ih (ih) &&
171 get_block_num(item, pos_in_item))
172 return 0;
173 return 1;
174 }
175
indirect_item_found(int retval,struct item_head * ih)176 static inline int indirect_item_found (int retval, struct item_head * ih)
177 {
178 return (retval == POSITION_FOUND) && is_indirect_le_ih (ih);
179 }
180
181
set_block_dev_mapped(struct buffer_head * bh,b_blocknr_t block,struct inode * inode)182 static inline void set_block_dev_mapped (struct buffer_head * bh,
183 b_blocknr_t block, struct inode * inode)
184 {
185 bh->b_dev = inode->i_dev;
186 bh->b_blocknr = block;
187 bh->b_state |= (1UL << BH_Mapped);
188 }
189
190
191 //
192 // files which were created in the earlier version can not be longer,
193 // than 2 gb
194 //
file_capable(struct inode * inode,long block)195 static int file_capable (struct inode * inode, long block)
196 {
197 if (get_inode_item_key_version (inode) != KEY_FORMAT_3_5 || // it is new file.
198 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
199 return 1;
200
201 return 0;
202 }
203
restart_transaction(struct reiserfs_transaction_handle * th,struct inode * inode,struct path * path)204 /*static*/ void restart_transaction(struct reiserfs_transaction_handle *th,
205 struct inode *inode, struct path *path) {
206 struct super_block *s = th->t_super ;
207 int len = th->t_blocks_allocated ;
208
209 pathrelse(path) ;
210 reiserfs_update_sd(th, inode) ;
211 journal_end(th, s, len) ;
212 journal_begin(th, s, len) ;
213 reiserfs_update_inode_transaction(inode) ;
214 }
215
216 // it is called by get_block when create == 0. Returns block number
217 // for 'block'-th logical block of file. When it hits direct item it
218 // returns 0 (being called from bmap) or read direct item into piece
219 // of page (bh_result)
220
221 // Please improve the english/clarity in the comment above, as it is
222 // hard to understand.
223
_get_block_create_0(struct inode * inode,long block,struct buffer_head * bh_result,int args)224 static int _get_block_create_0 (struct inode * inode, long block,
225 struct buffer_head * bh_result,
226 int args)
227 {
228 INITIALIZE_PATH (path);
229 struct cpu_key key;
230 struct buffer_head * bh;
231 struct item_head * ih, tmp_ih;
232 int fs_gen ;
233 int blocknr;
234 char * p = NULL;
235 int chars;
236 int ret ;
237 int done = 0 ;
238 unsigned long offset ;
239
240 // prepare the key to look for the 'block'-th block of file
241 make_cpu_key (&key, inode,
242 (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3);
243
244 research:
245 if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) {
246 pathrelse (&path);
247 if (p)
248 kunmap(bh_result->b_page) ;
249 // We do not return -ENOENT if there is a hole but page is uptodate, because it means
250 // That there is some MMAPED data associated with it that is yet to be written to disk.
251 if ((args & GET_BLOCK_NO_HOLE) && !Page_Uptodate(bh_result->b_page) ) {
252 return -ENOENT ;
253 }
254 return 0 ;
255 }
256
257 //
258 bh = get_last_bh (&path);
259 ih = get_ih (&path);
260 if (is_indirect_le_ih (ih)) {
261 __u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih);
262
263 /* FIXME: here we could cache indirect item or part of it in
264 the inode to avoid search_by_key in case of subsequent
265 access to file */
266 blocknr = get_block_num(ind_item, path.pos_in_item) ;
267 ret = 0 ;
268 if (blocknr) {
269 bh_result->b_dev = inode->i_dev;
270 bh_result->b_blocknr = blocknr;
271 bh_result->b_state |= (1UL << BH_Mapped);
272 } else
273 // We do not return -ENOENT if there is a hole but page is uptodate, because it means
274 // That there is some MMAPED data associated with it that is yet to be written to disk.
275 if ((args & GET_BLOCK_NO_HOLE) && !Page_Uptodate(bh_result->b_page) ) {
276 ret = -ENOENT ;
277 }
278
279 pathrelse (&path);
280 if (p)
281 kunmap(bh_result->b_page) ;
282 return ret ;
283 }
284
285 // requested data are in direct item(s)
286 if (!(args & GET_BLOCK_READ_DIRECT)) {
287 // we are called by bmap. FIXME: we can not map block of file
288 // when it is stored in direct item(s)
289 pathrelse (&path);
290 if (p)
291 kunmap(bh_result->b_page) ;
292 return -ENOENT;
293 }
294
295 /* if we've got a direct item, and the buffer was uptodate,
296 ** we don't want to pull data off disk again. skip to the
297 ** end, where we map the buffer and return
298 */
299 if (buffer_uptodate(bh_result)) {
300 goto finished ;
301 } else
302 /*
303 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
304 ** pages without any buffers. If the page is up to date, we don't want
305 ** read old data off disk. Set the up to date bit on the buffer instead
306 ** and jump to the end
307 */
308 if (Page_Uptodate(bh_result->b_page)) {
309 mark_buffer_uptodate(bh_result, 1);
310 goto finished ;
311 }
312
313 // read file tail into part of page
314 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ;
315 fs_gen = get_generation(inode->i_sb) ;
316 copy_item_head (&tmp_ih, ih);
317
318 /* we only want to kmap if we are reading the tail into the page.
319 ** this is not the common case, so we don't kmap until we are
320 ** sure we need to. But, this means the item might move if
321 ** kmap schedules
322 */
323 if (!p) {
324 p = (char *)kmap(bh_result->b_page) ;
325 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
326 goto research;
327 }
328 }
329 p += offset ;
330 memset (p, 0, inode->i_sb->s_blocksize);
331 do {
332 if (!is_direct_le_ih (ih)) {
333 BUG ();
334 }
335 /* make sure we don't read more bytes than actually exist in
336 ** the file. This can happen in odd cases where i_size isn't
337 ** correct, and when direct item padding results in a few
338 ** extra bytes at the end of the direct item
339 */
340 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
341 break ;
342 if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
343 chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item;
344 done = 1 ;
345 } else {
346 chars = ih_item_len(ih) - path.pos_in_item;
347 }
348 memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars);
349
350 if (done)
351 break ;
352
353 p += chars;
354
355 if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1))
356 // we done, if read direct item is not the last item of
357 // node FIXME: we could try to check right delimiting key
358 // to see whether direct item continues in the right
359 // neighbor or rely on i_size
360 break;
361
362 // update key to look for the next piece
363 set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars);
364 if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND)
365 // we read something from tail, even if now we got IO_ERROR
366 break;
367 bh = get_last_bh (&path);
368 ih = get_ih (&path);
369 } while (1);
370
371 flush_dcache_page(bh_result->b_page) ;
372 kunmap(bh_result->b_page) ;
373
374 finished:
375 pathrelse (&path);
376 bh_result->b_blocknr = 0 ;
377 bh_result->b_dev = inode->i_dev;
378 mark_buffer_uptodate (bh_result, 1);
379 bh_result->b_state |= (1UL << BH_Mapped);
380 return 0;
381 }
382
383
384 // this is called to create file map. So, _get_block_create_0 will not
385 // read direct item
reiserfs_bmap(struct inode * inode,long block,struct buffer_head * bh_result,int create)386 int reiserfs_bmap (struct inode * inode, long block,
387 struct buffer_head * bh_result, int create)
388 {
389 if (!file_capable (inode, block))
390 return -EFBIG;
391
392 lock_kernel() ;
393 /* do not read the direct item */
394 _get_block_create_0 (inode, block, bh_result, 0) ;
395 unlock_kernel() ;
396 return 0;
397 }
398
399 /* special version of get_block that is only used by grab_tail_page right
400 ** now. It is sent to block_prepare_write, and when you try to get a
401 ** block past the end of the file (or a block from a hole) it returns
402 ** -ENOENT instead of a valid buffer. block_prepare_write expects to
403 ** be able to do i/o on the buffers returned, unless an error value
404 ** is also returned.
405 **
406 ** So, this allows block_prepare_write to be used for reading a single block
407 ** in a page. Where it does not produce a valid page for holes, or past the
408 ** end of the file. This turns out to be exactly what we need for reading
409 ** tails for conversion.
410 **
411 ** The point of the wrapper is forcing a certain value for create, even
412 ** though the VFS layer is calling this function with create==1. If you
413 ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
414 ** don't use this function.
415 */
reiserfs_get_block_create_0(struct inode * inode,long block,struct buffer_head * bh_result,int create)416 static int reiserfs_get_block_create_0 (struct inode * inode, long block,
417 struct buffer_head * bh_result, int create) {
418 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ;
419 }
420
reiserfs_get_block_direct_io(struct inode * inode,long block,struct buffer_head * bh_result,int create)421 static int reiserfs_get_block_direct_io (struct inode * inode, long block,
422 struct buffer_head * bh_result, int create) {
423 int ret ;
424
425 bh_result->b_page = NULL;
426 ret = reiserfs_get_block(inode, block, bh_result, create) ;
427
428 /* don't allow direct io onto tail pages */
429 if (ret == 0 && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
430 /* make sure future calls to the direct io funcs for this offset
431 ** in the file fail by unmapping the buffer
432 */
433 reiserfs_unmap_buffer(bh_result);
434 ret = -EINVAL ;
435 }
436 /* Possible unpacked tail. Flush the data before pages have
437 disappeared */
438 if (inode->u.reiserfs_i.i_flags & i_pack_on_close_mask) {
439 lock_kernel();
440 reiserfs_commit_for_inode(inode);
441 inode->u.reiserfs_i.i_flags &= ~i_pack_on_close_mask;
442 unlock_kernel();
443 }
444 return ret ;
445 }
446
447
448 /*
449 ** helper function for when reiserfs_get_block is called for a hole
450 ** but the file tail is still in a direct item
451 ** bh_result is the buffer head for the hole
452 ** tail_offset is the offset of the start of the tail in the file
453 **
454 ** This calls prepare_write, which will start a new transaction
455 ** you should not be in a transaction, or have any paths held when you
456 ** call this.
457 */
convert_tail_for_hole(struct inode * inode,struct buffer_head * bh_result,loff_t tail_offset)458 static int convert_tail_for_hole(struct inode *inode,
459 struct buffer_head *bh_result,
460 loff_t tail_offset) {
461 unsigned long index ;
462 unsigned long tail_end ;
463 unsigned long tail_start ;
464 struct page * tail_page ;
465 struct page * hole_page = bh_result->b_page ;
466 int retval = 0 ;
467
468 if ((tail_offset & (bh_result->b_size - 1)) != 1)
469 return -EIO ;
470
471 /* always try to read until the end of the block */
472 tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ;
473 tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ;
474
475 index = tail_offset >> PAGE_CACHE_SHIFT ;
476 if ( !hole_page || index != hole_page->index) {
477 tail_page = grab_cache_page(inode->i_mapping, index) ;
478 retval = -ENOMEM;
479 if (!tail_page) {
480 goto out ;
481 }
482 } else {
483 tail_page = hole_page ;
484 }
485
486 /* we don't have to make sure the conversion did not happen while
487 ** we were locking the page because anyone that could convert
488 ** must first take i_sem.
489 **
490 ** We must fix the tail page for writing because it might have buffers
491 ** that are mapped, but have a block number of 0. This indicates tail
492 ** data that has been read directly into the page, and block_prepare_write
493 ** won't trigger a get_block in this case.
494 */
495 fix_tail_page_for_writing(tail_page) ;
496 retval = block_prepare_write(tail_page, tail_start, tail_end,
497 reiserfs_get_block) ;
498 if (retval)
499 goto unlock ;
500
501 /* tail conversion might change the data in the page */
502 flush_dcache_page(tail_page) ;
503
504 retval = generic_commit_write(NULL, tail_page, tail_start, tail_end) ;
505
506 unlock:
507 if (tail_page != hole_page) {
508 UnlockPage(tail_page) ;
509 page_cache_release(tail_page) ;
510 }
511 out:
512 return retval ;
513 }
514
_allocate_block(struct reiserfs_transaction_handle * th,long block,struct inode * inode,b_blocknr_t * allocated_block_nr,struct path * path,int flags)515 static inline int _allocate_block(struct reiserfs_transaction_handle *th,
516 long block,
517 struct inode *inode,
518 b_blocknr_t *allocated_block_nr,
519 struct path * path,
520 int flags) {
521
522 #ifdef REISERFS_PREALLOCATE
523 if (!(flags & GET_BLOCK_NO_ISEM)) {
524 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block);
525 }
526 #endif
527 return reiserfs_new_unf_blocknrs (th, inode, allocated_block_nr, path, block);
528 }
529
reiserfs_get_block(struct inode * inode,long block,struct buffer_head * bh_result,int create)530 static int reiserfs_get_block (struct inode * inode, long block,
531 struct buffer_head * bh_result, int create)
532 {
533 int repeat, retval;
534 b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is unsigned long
535 INITIALIZE_PATH(path);
536 int pos_in_item;
537 struct cpu_key key;
538 struct buffer_head * bh, * unbh = 0;
539 struct item_head * ih, tmp_ih;
540 __u32 * item;
541 int done;
542 int fs_gen;
543 int windex ;
544 struct reiserfs_transaction_handle th ;
545 /* space reserved in transaction batch:
546 . 3 balancings in direct->indirect conversion
547 . 1 block involved into reiserfs_update_sd()
548 XXX in practically impossible worst case direct2indirect()
549 can incur (much) more that 3 balancings. */
550 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1;
551 int version;
552 int transaction_started = 0 ;
553 loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
554
555 /* bad.... */
556 lock_kernel() ;
557 th.t_trans_id = 0 ;
558 version = get_inode_item_key_version (inode);
559
560 if (block < 0) {
561 unlock_kernel();
562 return -EIO;
563 }
564
565 if (!file_capable (inode, block)) {
566 unlock_kernel() ;
567 return -EFBIG;
568 }
569
570 /* if !create, we aren't changing the FS, so we don't need to
571 ** log anything, so we don't need to start a transaction
572 */
573 if (!(create & GET_BLOCK_CREATE)) {
574 int ret ;
575 /* find number of block-th logical block of the file */
576 ret = _get_block_create_0 (inode, block, bh_result,
577 create | GET_BLOCK_READ_DIRECT) ;
578 unlock_kernel() ;
579 return ret;
580 }
581
582 /* If file is of such a size, that it might have a tail and tails are enabled
583 ** we should mark it as possibly needing tail packing on close
584 */
585 if ( (have_large_tails (inode->i_sb) && inode->i_size < block_size (inode)*4) ||
586 (have_small_tails (inode->i_sb) && inode->i_size < block_size(inode)) )
587 inode->u.reiserfs_i.i_flags |= i_pack_on_close_mask;
588
589 windex = push_journal_writer("reiserfs_get_block") ;
590
591 /* set the key of the first byte in the 'block'-th block of file */
592 make_cpu_key (&key, inode, new_offset,
593 TYPE_ANY, 3/*key length*/);
594 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
595 journal_begin(&th, inode->i_sb, jbegin_count) ;
596 reiserfs_update_inode_transaction(inode) ;
597 transaction_started = 1 ;
598 }
599 research:
600
601 retval = search_for_position_by_key (inode->i_sb, &key, &path);
602 if (retval == IO_ERROR) {
603 retval = -EIO;
604 goto failure;
605 }
606
607 bh = get_last_bh (&path);
608 ih = get_ih (&path);
609 item = get_item (&path);
610 pos_in_item = path.pos_in_item;
611
612 fs_gen = get_generation (inode->i_sb);
613 copy_item_head (&tmp_ih, ih);
614
615 if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
616 /* we have to allocate block for the unformatted node */
617 if (!transaction_started) {
618 pathrelse(&path) ;
619 journal_begin(&th, inode->i_sb, jbegin_count) ;
620 reiserfs_update_inode_transaction(inode) ;
621 transaction_started = 1 ;
622 goto research ;
623 }
624
625 repeat = _allocate_block(&th, block, inode, &allocated_block_nr, &path, create);
626
627 if (repeat == NO_DISK_SPACE) {
628 /* restart the transaction to give the journal a chance to free
629 ** some blocks. releases the path, so we have to go back to
630 ** research if we succeed on the second try
631 */
632 restart_transaction(&th, inode, &path) ;
633 repeat = _allocate_block(&th, block, inode, &allocated_block_nr, NULL, create);
634
635 if (repeat != NO_DISK_SPACE) {
636 goto research ;
637 }
638 retval = -ENOSPC;
639 goto failure;
640 }
641
642 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
643 goto research;
644 }
645 }
646
647 if (indirect_item_found (retval, ih)) {
648 b_blocknr_t unfm_ptr;
649 /* 'block'-th block is in the file already (there is
650 corresponding cell in some indirect item). But it may be
651 zero unformatted node pointer (hole) */
652 unfm_ptr = get_block_num (item, pos_in_item);
653 if (unfm_ptr == 0) {
654 /* use allocated block to plug the hole */
655 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
656 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
657 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
658 goto research;
659 }
660 bh_result->b_state |= (1UL << BH_New);
661 put_block_num(item, pos_in_item, allocated_block_nr) ;
662 unfm_ptr = allocated_block_nr;
663 journal_mark_dirty (&th, inode->i_sb, bh);
664 inode->i_blocks += (inode->i_sb->s_blocksize / 512) ;
665 reiserfs_update_sd(&th, inode) ;
666 }
667 set_block_dev_mapped(bh_result, unfm_ptr, inode);
668 pathrelse (&path);
669 pop_journal_writer(windex) ;
670 if (transaction_started)
671 journal_end(&th, inode->i_sb, jbegin_count) ;
672
673 unlock_kernel() ;
674
675 /* the item was found, so new blocks were not added to the file
676 ** there is no need to make sure the inode is updated with this
677 ** transaction
678 */
679 return 0;
680 }
681
682 if (!transaction_started) {
683 /* if we don't pathrelse, we could vs-3050 on the buffer if
684 ** someone is waiting for it (they can't finish until the buffer
685 ** is released, we can start a new transaction until they finish)
686 */
687 pathrelse(&path) ;
688 journal_begin(&th, inode->i_sb, jbegin_count) ;
689 reiserfs_update_inode_transaction(inode) ;
690 transaction_started = 1 ;
691 goto research;
692 }
693
694 /* desired position is not found or is in the direct item. We have
695 to append file with holes up to 'block'-th block converting
696 direct items to indirect one if necessary */
697 done = 0;
698 do {
699 if (is_statdata_le_ih (ih)) {
700 __u32 unp = 0;
701 struct cpu_key tmp_key;
702
703 /* indirect item has to be inserted */
704 make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT,
705 UNFM_P_SIZE, 0/* free_space */);
706
707 if (cpu_key_k_offset (&key) == 1) {
708 /* we are going to add 'block'-th block to the file. Use
709 allocated block for that */
710 unp = cpu_to_le32 (allocated_block_nr);
711 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
712 bh_result->b_state |= (1UL << BH_New);
713 done = 1;
714 }
715 tmp_key = key; // ;)
716 set_cpu_key_k_offset (&tmp_key, 1);
717 PATH_LAST_POSITION(&path) ++;
718
719 retval = reiserfs_insert_item (&th, &path, &tmp_key, &tmp_ih, (char *)&unp);
720 if (retval) {
721 reiserfs_free_block (&th, allocated_block_nr);
722 goto failure; // retval == -ENOSPC or -EIO or -EEXIST
723 }
724 if (unp)
725 inode->i_blocks += inode->i_sb->s_blocksize / 512;
726 //mark_tail_converted (inode);
727 } else if (is_direct_le_ih (ih)) {
728 /* direct item has to be converted */
729 loff_t tail_offset;
730
731 tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
732 if (tail_offset == cpu_key_k_offset (&key)) {
733 /* direct item we just found fits into block we have
734 to map. Convert it into unformatted node: use
735 bh_result for the conversion */
736 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
737 unbh = bh_result;
738 done = 1;
739 } else {
740 /* we have to padd file tail stored in direct item(s)
741 up to block size and convert it to unformatted
742 node. FIXME: this should also get into page cache */
743
744 pathrelse(&path) ;
745 journal_end(&th, inode->i_sb, jbegin_count) ;
746 transaction_started = 0 ;
747
748 retval = convert_tail_for_hole(inode, bh_result, tail_offset) ;
749 if (retval) {
750 if ( retval != -ENOSPC )
751 reiserfs_warning(inode->i_sb, "clm-6004: convert tail failed inode %lu, error %d\n", inode->i_ino, retval) ;
752 if (allocated_block_nr) {
753 /* the bitmap, the super, and the stat data == 3 */
754 journal_begin(&th, inode->i_sb, 3) ;
755 reiserfs_free_block (&th, allocated_block_nr);
756 transaction_started = 1 ;
757 }
758 goto failure ;
759 }
760 goto research ;
761 }
762 retval = direct2indirect (&th, inode, &path, unbh, tail_offset);
763 if (retval) {
764 reiserfs_unmap_buffer(unbh);
765 reiserfs_free_block (&th, allocated_block_nr);
766 goto failure;
767 }
768 /* it is important the mark_buffer_uptodate is done after
769 ** the direct2indirect. The buffer might contain valid
770 ** data newer than the data on disk (read by readpage, changed,
771 ** and then sent here by writepage). direct2indirect needs
772 ** to know if unbh was already up to date, so it can decide
773 ** if the data in unbh needs to be replaced with data from
774 ** the disk
775 */
776 mark_buffer_uptodate (unbh, 1);
777
778 /* unbh->b_page == NULL in case of DIRECT_IO request, this means
779 buffer will disappear shortly, so it should not be added to
780 any of our lists.
781 */
782 if ( unbh->b_page ) {
783 /* we've converted the tail, so we must
784 ** flush unbh before the transaction commits
785 */
786 add_to_flushlist(inode, unbh) ;
787
788 /* mark it dirty now to prevent commit_write from adding
789 ** this buffer to the inode's dirty buffer list
790 */
791 __mark_buffer_dirty(unbh) ;
792 }
793
794 //inode->i_blocks += inode->i_sb->s_blocksize / 512;
795 //mark_tail_converted (inode);
796 } else {
797 /* append indirect item with holes if needed, when appending
798 pointer to 'block'-th block use block, which is already
799 allocated */
800 struct cpu_key tmp_key;
801 unp_t unf_single=0; // We use this in case we need to allocate only
802 // one block which is a fastpath
803 unp_t *un;
804 __u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE;
805 __u64 blocks_needed;
806
807 RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
808 "vs-804: invalid position for append");
809 /* indirect item has to be appended, set up key of that position */
810 make_cpu_key (&tmp_key, inode,
811 le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize),
812 //pos_in_item * inode->i_sb->s_blocksize,
813 TYPE_INDIRECT, 3);// key type is unimportant
814
815 blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits);
816 RFALSE( blocks_needed < 0, "green-805: invalid offset");
817
818 if ( blocks_needed == 1 ) {
819 un = &unf_single;
820 } else {
821 un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE,
822 GFP_ATOMIC); // We need to avoid scheduling.
823 if ( !un) {
824 un = &unf_single;
825 blocks_needed = 1;
826 max_to_insert = 0;
827 } else
828 memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert));
829 }
830 if ( blocks_needed <= max_to_insert) {
831 /* we are going to add target block to the file. Use allocated
832 block for that */
833 un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr);
834 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
835 bh_result->b_state |= (1UL << BH_New);
836 done = 1;
837 } else {
838 /* paste hole to the indirect item */
839 /* If kmalloc failed, max_to_insert becomes zero and it means we
840 only have space for one block */
841 blocks_needed=max_to_insert?max_to_insert:1;
842 }
843 retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)un, UNFM_P_SIZE * blocks_needed);
844
845 if (blocks_needed != 1)
846 kfree(un);
847
848 if (retval) {
849 reiserfs_free_block (&th, allocated_block_nr);
850 goto failure;
851 }
852 if (done) {
853 inode->i_blocks += inode->i_sb->s_blocksize / 512;
854 } else {
855 /* We need to mark new file size in case this function will be
856 interrupted/aborted later on. And we may do this only for
857 holes. */
858 inode->i_size += blocks_needed << inode->i_blkbits;
859 }
860 //mark_tail_converted (inode);
861 }
862
863 if (done == 1)
864 break;
865
866 /* this loop could log more blocks than we had originally asked
867 ** for. So, we have to allow the transaction to end if it is
868 ** too big or too full. Update the inode so things are
869 ** consistent if we crash before the function returns
870 **
871 ** release the path so that anybody waiting on the path before
872 ** ending their transaction will be able to continue.
873 */
874 if (journal_transaction_should_end(&th, th.t_blocks_allocated)) {
875 restart_transaction(&th, inode, &path) ;
876 }
877 /* inserting indirect pointers for a hole can take a
878 ** long time. reschedule if needed
879 */
880 if (current->need_resched)
881 schedule() ;
882
883 retval = search_for_position_by_key (inode->i_sb, &key, &path);
884 if (retval == IO_ERROR) {
885 retval = -EIO;
886 goto failure;
887 }
888 if (retval == POSITION_FOUND) {
889 reiserfs_warning (inode->i_sb, "vs-825: reiserfs_get_block: "
890 "%K should not be found\n", &key);
891 retval = -EEXIST;
892 if (allocated_block_nr)
893 reiserfs_free_block (&th, allocated_block_nr);
894 pathrelse(&path) ;
895 goto failure;
896 }
897 bh = get_last_bh (&path);
898 ih = get_ih (&path);
899 item = get_item (&path);
900 pos_in_item = path.pos_in_item;
901 } while (1);
902
903
904 retval = 0;
905 reiserfs_check_path(&path) ;
906
907 failure:
908 if (transaction_started) {
909 reiserfs_update_sd(&th, inode) ;
910 journal_end(&th, inode->i_sb, jbegin_count) ;
911 }
912 pop_journal_writer(windex) ;
913 unlock_kernel() ;
914 reiserfs_check_path(&path) ;
915 return retval;
916 }
917
918
919 //
920 // BAD: new directories have stat data of new type and all other items
921 // of old type. Version stored in the inode says about body items, so
922 // in update_stat_data we can not rely on inode, but have to check
923 // item version directly
924 //
925
926 // called by read_inode
init_inode(struct inode * inode,struct path * path)927 static void init_inode (struct inode * inode, struct path * path)
928 {
929 struct buffer_head * bh;
930 struct item_head * ih;
931 __u32 rdev;
932 //int version = ITEM_VERSION_1;
933
934 bh = PATH_PLAST_BUFFER (path);
935 ih = PATH_PITEM_HEAD (path);
936
937 spin_lock(&keycopy_lock);
938 copy_key (INODE_PKEY (inode), &(ih->ih_key));
939 spin_unlock(&keycopy_lock);
940 inode->i_blksize = PAGE_SIZE;
941
942 INIT_LIST_HEAD(&inode->u.reiserfs_i.i_prealloc_list) ;
943
944 if (stat_data_v1 (ih)) {
945 struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih);
946 unsigned long blocks;
947
948 set_inode_item_key_version (inode, KEY_FORMAT_3_5);
949 set_inode_sd_version (inode, STAT_DATA_V1);
950 inode->i_mode = sd_v1_mode(sd);
951 inode->i_nlink = sd_v1_nlink(sd);
952 inode->i_uid = sd_v1_uid(sd);
953 inode->i_gid = sd_v1_gid(sd);
954 inode->i_size = sd_v1_size(sd);
955 inode->i_atime = sd_v1_atime(sd);
956 inode->i_mtime = sd_v1_mtime(sd);
957 inode->i_ctime = sd_v1_ctime(sd);
958
959 inode->i_blocks = sd_v1_blocks(sd);
960 inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id);
961 blocks = (inode->i_size + 511) >> 9;
962 blocks = _ROUND_UP (blocks, inode->i_sb->s_blocksize >> 9);
963 if (inode->i_blocks > blocks) {
964 // there was a bug in <=3.5.23 when i_blocks could take negative
965 // values. Starting from 3.5.17 this value could even be stored in
966 // stat data. For such files we set i_blocks based on file
967 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
968 // only updated if file's inode will ever change
969 inode->i_blocks = blocks;
970 }
971
972 rdev = sd_v1_rdev(sd);
973 inode->u.reiserfs_i.i_first_direct_byte = sd_v1_first_direct_byte(sd);
974 /* nopack is initially zero for v1 objects. For v2 objects,
975 nopack is initialised from sd_attrs */
976 inode->u.reiserfs_i.i_flags &= ~i_nopack_mask;
977 } else {
978 // new stat data found, but object may have old items
979 // (directories and symlinks)
980 struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih);
981
982 inode->i_mode = sd_v2_mode(sd);
983 inode->i_nlink = sd_v2_nlink(sd);
984 inode->i_uid = sd_v2_uid(sd);
985 inode->i_size = sd_v2_size(sd);
986 inode->i_gid = sd_v2_gid(sd);
987 inode->i_mtime = sd_v2_mtime(sd);
988 inode->i_atime = sd_v2_atime(sd);
989 inode->i_ctime = sd_v2_ctime(sd);
990 inode->i_blocks = sd_v2_blocks(sd);
991 rdev = sd_v2_rdev(sd);
992 if( S_ISCHR( inode -> i_mode ) || S_ISBLK( inode -> i_mode ) )
993 inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id);
994 else
995 inode->i_generation = sd_v2_generation(sd);
996
997 if (S_ISDIR (inode->i_mode) || S_ISLNK (inode->i_mode))
998 set_inode_item_key_version (inode, KEY_FORMAT_3_5);
999 else
1000 set_inode_item_key_version (inode, KEY_FORMAT_3_6);
1001
1002 set_inode_sd_version (inode, STAT_DATA_V2);
1003 /* read persistent inode attributes from sd and initalise
1004 generic inode flags from them */
1005 inode -> u.reiserfs_i.i_attrs = sd_v2_attrs( sd );
1006 sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode );
1007 }
1008
1009
1010 pathrelse (path);
1011 if (S_ISREG (inode->i_mode)) {
1012 inode->i_op = &reiserfs_file_inode_operations;
1013 inode->i_fop = &reiserfs_file_operations;
1014 inode->i_mapping->a_ops = &reiserfs_address_space_operations ;
1015 } else if (S_ISDIR (inode->i_mode)) {
1016 inode->i_op = &reiserfs_dir_inode_operations;
1017 inode->i_fop = &reiserfs_dir_operations;
1018 } else if (S_ISLNK (inode->i_mode)) {
1019 inode->i_op = &page_symlink_inode_operations;
1020 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1021 } else {
1022 inode->i_blocks = 0;
1023 init_special_inode(inode, inode->i_mode, rdev) ;
1024 }
1025 }
1026
1027
1028 // update new stat data with inode fields
inode2sd(void * sd,struct inode * inode)1029 static void inode2sd (void * sd, struct inode * inode)
1030 {
1031 struct stat_data * sd_v2 = (struct stat_data *)sd;
1032 __u16 flags;
1033
1034 set_sd_v2_mode(sd_v2, inode->i_mode );
1035 set_sd_v2_nlink(sd_v2, inode->i_nlink );
1036 set_sd_v2_uid(sd_v2, inode->i_uid );
1037 set_sd_v2_size(sd_v2, inode->i_size );
1038 set_sd_v2_gid(sd_v2, inode->i_gid );
1039 set_sd_v2_mtime(sd_v2, inode->i_mtime );
1040 set_sd_v2_atime(sd_v2, inode->i_atime );
1041 set_sd_v2_ctime(sd_v2, inode->i_ctime );
1042 set_sd_v2_blocks(sd_v2, inode->i_blocks );
1043 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1044 set_sd_v2_rdev(sd_v2, inode->i_rdev );
1045 else
1046 set_sd_v2_generation(sd_v2, inode->i_generation);
1047 flags = inode -> u.reiserfs_i.i_attrs;
1048 i_attrs_to_sd_attrs( inode, &flags );
1049 set_sd_v2_attrs( sd_v2, flags );
1050 }
1051
1052
1053 // used to copy inode's fields to old stat data
inode2sd_v1(void * sd,struct inode * inode)1054 static void inode2sd_v1 (void * sd, struct inode * inode)
1055 {
1056 struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd;
1057
1058 set_sd_v1_mode(sd_v1, inode->i_mode );
1059 set_sd_v1_uid(sd_v1, inode->i_uid );
1060 set_sd_v1_gid(sd_v1, inode->i_gid );
1061 set_sd_v1_nlink(sd_v1, inode->i_nlink );
1062 set_sd_v1_size(sd_v1, inode->i_size );
1063 set_sd_v1_atime(sd_v1, inode->i_atime );
1064 set_sd_v1_ctime(sd_v1, inode->i_ctime );
1065 set_sd_v1_mtime(sd_v1, inode->i_mtime );
1066
1067 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1068 set_sd_v1_rdev(sd_v1, inode->i_rdev );
1069 else
1070 set_sd_v1_blocks(sd_v1, inode->i_blocks );
1071
1072 // Sigh. i_first_direct_byte is back
1073 set_sd_v1_first_direct_byte(sd_v1, inode->u.reiserfs_i.i_first_direct_byte);
1074 }
1075
1076
1077 /* NOTE, you must prepare the buffer head before sending it here,
1078 ** and then log it after the call
1079 */
update_stat_data(struct path * path,struct inode * inode)1080 static void update_stat_data (struct path * path, struct inode * inode)
1081 {
1082 struct buffer_head * bh;
1083 struct item_head * ih;
1084
1085 bh = PATH_PLAST_BUFFER (path);
1086 ih = PATH_PITEM_HEAD (path);
1087
1088 if (!is_statdata_le_ih (ih))
1089 reiserfs_panic (inode->i_sb, "vs-13065: update_stat_data: key %k, found item %h",
1090 INODE_PKEY (inode), ih);
1091
1092 if (stat_data_v1 (ih)) {
1093 // path points to old stat data
1094 inode2sd_v1 (B_I_PITEM (bh, ih), inode);
1095 } else {
1096 inode2sd (B_I_PITEM (bh, ih), inode);
1097 }
1098
1099 return;
1100 }
1101
1102
reiserfs_update_sd(struct reiserfs_transaction_handle * th,struct inode * inode)1103 void reiserfs_update_sd (struct reiserfs_transaction_handle *th,
1104 struct inode * inode)
1105 {
1106 struct cpu_key key;
1107 INITIALIZE_PATH(path);
1108 struct buffer_head *bh ;
1109 int fs_gen ;
1110 struct item_head *ih, tmp_ih ;
1111 int retval;
1112
1113 make_cpu_key (&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);//key type is unimportant
1114
1115 for(;;) {
1116 int pos;
1117 /* look for the object's stat data */
1118 retval = search_item (inode->i_sb, &key, &path);
1119 if (retval == IO_ERROR) {
1120 reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: "
1121 "i/o failure occurred trying to update %K stat data\n",
1122 &key);
1123 return;
1124 }
1125 if (retval == ITEM_NOT_FOUND) {
1126 pos = PATH_LAST_POSITION (&path);
1127 pathrelse(&path) ;
1128 if (inode->i_nlink == 0) {
1129 /*printk ("vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found\n");*/
1130 return;
1131 }
1132 reiserfs_warning (inode->i_sb, "vs-13060: reiserfs_update_sd: "
1133 "stat data of object %k (nlink == %d) not found (pos %d)\n",
1134 INODE_PKEY (inode), inode->i_nlink, pos);
1135 reiserfs_check_path(&path) ;
1136 return;
1137 }
1138
1139 /* sigh, prepare_for_journal might schedule. When it schedules the
1140 ** FS might change. We have to detect that, and loop back to the
1141 ** search if the stat data item has moved
1142 */
1143 bh = get_last_bh(&path) ;
1144 ih = get_ih(&path) ;
1145 copy_item_head (&tmp_ih, ih);
1146 fs_gen = get_generation (inode->i_sb);
1147 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1148 if (fs_changed (fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
1149 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1150 continue ; /* Stat_data item has been moved after scheduling. */
1151 }
1152 break;
1153 }
1154 update_stat_data (&path, inode);
1155 journal_mark_dirty(th, th->t_super, bh) ;
1156 pathrelse (&path);
1157 return;
1158 }
1159
1160 /* We need to clear inode key in private part of inode to avoid races between
1161 blocking iput, knfsd and file deletion with creating of safelinks.*/
reiserfs_make_bad_inode(struct inode * inode)1162 static void reiserfs_make_bad_inode(struct inode *inode) {
1163 memset(INODE_PKEY(inode), 0, KEY_SIZE);
1164 make_bad_inode(inode);
1165 }
1166
reiserfs_read_inode(struct inode * inode)1167 void reiserfs_read_inode(struct inode *inode) {
1168 reiserfs_make_bad_inode(inode) ;
1169 }
1170
1171
1172 /* looks for stat data in the tree, and fills up the fields of in-core
1173 inode stat data fields */
reiserfs_read_inode2(struct inode * inode,void * p)1174 void reiserfs_read_inode2 (struct inode * inode, void *p)
1175 {
1176 INITIALIZE_PATH (path_to_sd);
1177 struct cpu_key key;
1178 struct reiserfs_iget4_args *args = (struct reiserfs_iget4_args *)p ;
1179 unsigned long dirino;
1180 int retval;
1181
1182 if (!p) {
1183 reiserfs_make_bad_inode(inode) ;
1184 return;
1185 }
1186
1187 dirino = args->objectid ;
1188
1189 /* set version 1, version 2 could be used too, because stat data
1190 key is the same in both versions */
1191 key.version = KEY_FORMAT_3_5;
1192 key.on_disk_key.k_dir_id = dirino;
1193 key.on_disk_key.k_objectid = inode->i_ino;
1194 key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET;
1195 key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS;
1196
1197 /* look for the object's stat data */
1198 retval = search_item (inode->i_sb, &key, &path_to_sd);
1199 if (retval == IO_ERROR) {
1200 reiserfs_warning (inode->i_sb, "vs-13070: reiserfs_read_inode2: "
1201 "i/o failure occurred trying to find stat data of %K\n",
1202 &key);
1203 reiserfs_make_bad_inode(inode) ;
1204 return;
1205 }
1206 if (retval != ITEM_FOUND) {
1207 /* a stale NFS handle can trigger this without it being an error */
1208 pathrelse (&path_to_sd);
1209 reiserfs_make_bad_inode(inode) ;
1210 inode->i_nlink = 0;
1211 return;
1212 }
1213
1214 init_inode (inode, &path_to_sd);
1215
1216 /* It is possible that knfsd is trying to access inode of a file
1217 that is being removed from the disk by some other thread. As we
1218 update sd on unlink all that is required is to check for nlink
1219 here. This bug was first found by Sizif when debugging
1220 SquidNG/Butterfly, forgotten, and found again after Philippe
1221 Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1222
1223 More logical fix would require changes in fs/inode.c:iput() to
1224 remove inode from hash-table _after_ fs cleaned disk stuff up and
1225 in iget() to return NULL if I_FREEING inode is found in
1226 hash-table. */
1227 /* Currently there is one place where it's ok to meet inode with
1228 nlink==0: processing of open-unlinked and half-truncated files
1229 during mount (fs/reiserfs/super.c:finish_unfinished()). */
1230 if( ( inode -> i_nlink == 0 ) &&
1231 ! inode -> i_sb -> u.reiserfs_sb.s_is_unlinked_ok ) {
1232 reiserfs_warning( inode->i_sb, "vs-13075: reiserfs_read_inode2: "
1233 "dead inode read from disk %K. "
1234 "This is likely to be race with knfsd. Ignore\n",
1235 &key );
1236 reiserfs_make_bad_inode( inode );
1237 }
1238
1239 reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */
1240
1241 }
1242
1243 /**
1244 * reiserfs_find_actor() - "find actor" reiserfs supplies to iget4().
1245 *
1246 * @inode: inode from hash table to check
1247 * @inode_no: inode number we are looking for
1248 * @opaque: "cookie" passed to iget4(). This is &reiserfs_iget4_args.
1249 *
1250 * This function is called by iget4() to distinguish reiserfs inodes
1251 * having the same inode numbers. Such inodes can only exist due to some
1252 * error condition. One of them should be bad. Inodes with identical
1253 * inode numbers (objectids) are distinguished by parent directory ids.
1254 *
1255 */
reiserfs_find_actor(struct inode * inode,unsigned long inode_no,void * opaque)1256 static int reiserfs_find_actor( struct inode *inode,
1257 unsigned long inode_no, void *opaque )
1258 {
1259 struct reiserfs_iget4_args *args;
1260 int retval;
1261
1262 args = opaque;
1263 /* We protect against possible parallel init_inode() on another CPU here. */
1264 spin_lock(&keycopy_lock);
1265 /* args is already in CPU order */
1266 if (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args -> objectid)
1267 retval = 1;
1268 else
1269 /* If The key does not match, lets see if we are racing
1270 with another iget4, that already progressed so far
1271 to reiserfs_read_inode2() and was preempted in
1272 call to search_by_key(). The signs of that are:
1273 Inode is locked
1274 dirid and object id are zero (not yet initialized)*/
1275 retval = (inode->i_state & I_LOCK) &&
1276 !INODE_PKEY(inode)->k_dir_id &&
1277 !INODE_PKEY(inode)->k_objectid;
1278
1279 spin_unlock(&keycopy_lock);
1280 return retval;
1281 }
1282
reiserfs_iget(struct super_block * s,const struct cpu_key * key)1283 struct inode * reiserfs_iget (struct super_block * s, const struct cpu_key * key)
1284 {
1285 struct inode * inode;
1286 struct reiserfs_iget4_args args ;
1287
1288 args.objectid = key->on_disk_key.k_dir_id ;
1289 inode = iget4 (s, key->on_disk_key.k_objectid,
1290 reiserfs_find_actor, (void *)(&args));
1291 if (!inode)
1292 return ERR_PTR(-ENOMEM) ;
1293
1294 if (comp_short_keys (INODE_PKEY (inode), key) || is_bad_inode (inode)) {
1295 /* either due to i/o error or a stale NFS handle */
1296 iput (inode);
1297 inode = 0;
1298 }
1299 return inode;
1300 }
1301
reiserfs_fh_to_dentry(struct super_block * sb,__u32 * data,int len,int fhtype,int parent)1302 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, __u32 *data,
1303 int len, int fhtype, int parent) {
1304 struct cpu_key key ;
1305 struct inode *inode = NULL ;
1306 struct list_head *lp;
1307 struct dentry *result;
1308
1309 /* fhtype happens to reflect the number of u32s encoded.
1310 * due to a bug in earlier code, fhtype might indicate there
1311 * are more u32s then actually fitted.
1312 * so if fhtype seems to be more than len, reduce fhtype.
1313 * Valid types are:
1314 * 2 - objectid + dir_id - legacy support
1315 * 3 - objectid + dir_id + generation
1316 * 4 - objectid + dir_id + objectid and dirid of parent - legacy
1317 * 5 - objectid + dir_id + generation + objectid and dirid of parent
1318 * 6 - as above plus generation of directory
1319 * 6 does not fit in NFSv2 handles
1320 */
1321 if (fhtype > len) {
1322 if (fhtype != 6 || len != 5)
1323 reiserfs_warning(sb, "nfsd/reiserfs, fhtype=%d, len=%d - odd\n",
1324 fhtype, len);
1325 fhtype = 5;
1326 }
1327 if (fhtype < 2 || (parent && fhtype < 4))
1328 goto out ;
1329
1330 if (! parent) {
1331 /* this works for handles from old kernels because the default
1332 ** reiserfs generation number is the packing locality.
1333 */
1334 key.on_disk_key.k_objectid = data[0] ;
1335 key.on_disk_key.k_dir_id = data[1] ;
1336 inode = reiserfs_iget(sb, &key) ;
1337 if (inode && !IS_ERR(inode) && (fhtype == 3 || fhtype >= 5) &&
1338 data[2] != inode->i_generation) {
1339 iput(inode) ;
1340 inode = NULL ;
1341 }
1342 } else {
1343 key.on_disk_key.k_objectid = data[fhtype>=5?3:2] ;
1344 key.on_disk_key.k_dir_id = data[fhtype>=5?4:3] ;
1345 inode = reiserfs_iget(sb, &key) ;
1346 if (inode && !IS_ERR(inode) && fhtype == 6 &&
1347 data[5] != inode->i_generation) {
1348 iput(inode) ;
1349 inode = NULL ;
1350 }
1351 }
1352 out:
1353 if (IS_ERR(inode))
1354 return ERR_PTR(PTR_ERR(inode));
1355 if (!inode)
1356 return ERR_PTR(-ESTALE) ;
1357
1358 /* now to find a dentry.
1359 * If possible, get a well-connected one
1360 */
1361 spin_lock(&dcache_lock);
1362 for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) {
1363 result = list_entry(lp,struct dentry, d_alias);
1364 if (! (result->d_flags & DCACHE_NFSD_DISCONNECTED)) {
1365 dget_locked(result);
1366 result->d_vfs_flags |= DCACHE_REFERENCED;
1367 spin_unlock(&dcache_lock);
1368 iput(inode);
1369 return result;
1370 }
1371 }
1372 spin_unlock(&dcache_lock);
1373 result = d_alloc_root(inode);
1374 if (result == NULL) {
1375 iput(inode);
1376 return ERR_PTR(-ENOMEM);
1377 }
1378 result->d_flags |= DCACHE_NFSD_DISCONNECTED;
1379 return result;
1380
1381 }
1382
reiserfs_dentry_to_fh(struct dentry * dentry,__u32 * data,int * lenp,int need_parent)1383 int reiserfs_dentry_to_fh(struct dentry *dentry, __u32 *data, int *lenp, int need_parent) {
1384 struct inode *inode = dentry->d_inode ;
1385 int maxlen = *lenp;
1386
1387 if (maxlen < 3)
1388 return 255 ;
1389
1390 data[0] = inode->i_ino ;
1391 data[1] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1392 data[2] = inode->i_generation ;
1393 *lenp = 3 ;
1394 /* no room for directory info? return what we've stored so far */
1395 if (maxlen < 5 || ! need_parent)
1396 return 3 ;
1397
1398 inode = dentry->d_parent->d_inode ;
1399 data[3] = inode->i_ino ;
1400 data[4] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1401 *lenp = 5 ;
1402 if (maxlen < 6)
1403 return 5 ;
1404 data[5] = inode->i_generation ;
1405 *lenp = 6 ;
1406 return 6 ;
1407 }
1408
1409
1410 /* looks for stat data, then copies fields to it, marks the buffer
1411 containing stat data as dirty */
1412 /* reiserfs inodes are never really dirty, since the dirty inode call
1413 ** always logs them. This call allows the VFS inode marking routines
1414 ** to properly mark inodes for datasync and such, but only actually
1415 ** does something when called for a synchronous update.
1416 */
reiserfs_write_inode(struct inode * inode,int do_sync)1417 void reiserfs_write_inode (struct inode * inode, int do_sync) {
1418 struct reiserfs_transaction_handle th ;
1419 int jbegin_count = 1 ;
1420
1421 if (inode->i_sb->s_flags & MS_RDONLY) {
1422 reiserfs_warning(inode->i_sb, "clm-6005: writing inode %lu on readonly FS\n",
1423 inode->i_ino) ;
1424 return ;
1425 }
1426 /* memory pressure can sometimes initiate write_inode calls with sync == 1,
1427 ** these cases are just when the system needs ram, not when the
1428 ** inode needs to reach disk for safety, and they can safely be
1429 ** ignored because the altered inode has already been logged.
1430 */
1431 if (do_sync && !(current->flags & PF_MEMALLOC)) {
1432 lock_kernel() ;
1433 journal_begin(&th, inode->i_sb, jbegin_count) ;
1434 reiserfs_update_sd (&th, inode);
1435 journal_end_sync(&th, inode->i_sb, jbegin_count) ;
1436 unlock_kernel() ;
1437 }
1438 }
1439
1440 /* FIXME: no need any more. right? */
reiserfs_sync_inode(struct reiserfs_transaction_handle * th,struct inode * inode)1441 int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode)
1442 {
1443 int err = 0;
1444
1445 reiserfs_update_sd (th, inode);
1446 return err;
1447 }
1448
1449
1450 /* stat data of new object is inserted already, this inserts the item
1451 containing "." and ".." entries */
reiserfs_new_directory(struct reiserfs_transaction_handle * th,struct item_head * ih,struct path * path,const struct inode * dir)1452 static int reiserfs_new_directory (struct reiserfs_transaction_handle *th,
1453 struct item_head * ih, struct path * path,
1454 const struct inode * dir)
1455 {
1456 struct super_block * sb = th->t_super;
1457 char empty_dir [EMPTY_DIR_SIZE];
1458 char * body = empty_dir;
1459 struct cpu_key key;
1460 int retval;
1461
1462 _make_cpu_key (&key, KEY_FORMAT_3_5, le32_to_cpu (ih->ih_key.k_dir_id),
1463 le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/*key length*/);
1464
1465 /* compose item head for new item. Directories consist of items of
1466 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1467 is done by reiserfs_new_inode */
1468 if (old_format_only (sb)) {
1469 make_le_item_head (ih, 0, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1470
1471 make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
1472 INODE_PKEY (dir)->k_dir_id,
1473 INODE_PKEY (dir)->k_objectid );
1474 } else {
1475 make_le_item_head (ih, 0, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1476
1477 make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
1478 INODE_PKEY (dir)->k_dir_id,
1479 INODE_PKEY (dir)->k_objectid );
1480 }
1481
1482 /* look for place in the tree for new item */
1483 retval = search_item (sb, &key, path);
1484 if (retval == IO_ERROR) {
1485 reiserfs_warning (sb, "vs-13080: reiserfs_new_directory: "
1486 "i/o failure occurred creating new directory\n");
1487 return -EIO;
1488 }
1489 if (retval == ITEM_FOUND) {
1490 pathrelse (path);
1491 reiserfs_warning (sb, "vs-13070: reiserfs_new_directory: "
1492 "object with this key exists (%k)\n", &(ih->ih_key));
1493 return -EEXIST;
1494 }
1495
1496 /* insert item, that is empty directory item */
1497 return reiserfs_insert_item (th, path, &key, ih, body);
1498 }
1499
1500
1501 /* stat data of object has been inserted, this inserts the item
1502 containing the body of symlink */
reiserfs_new_symlink(struct reiserfs_transaction_handle * th,struct item_head * ih,struct path * path,const char * symname,int item_len)1503 static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th,
1504 struct item_head * ih,
1505 struct path * path, const char * symname, int item_len)
1506 {
1507 struct super_block * sb = th->t_super;
1508 struct cpu_key key;
1509 int retval;
1510
1511 _make_cpu_key (&key, KEY_FORMAT_3_5,
1512 le32_to_cpu (ih->ih_key.k_dir_id),
1513 le32_to_cpu (ih->ih_key.k_objectid),
1514 1, TYPE_DIRECT, 3/*key length*/);
1515
1516 make_le_item_head (ih, 0, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, 0/*free_space*/);
1517
1518 /* look for place in the tree for new item */
1519 retval = search_item (sb, &key, path);
1520 if (retval == IO_ERROR) {
1521 reiserfs_warning (sb, "vs-13080: reiserfs_new_symlinik: "
1522 "i/o failure occurred creating new symlink\n");
1523 return -EIO;
1524 }
1525 if (retval == ITEM_FOUND) {
1526 pathrelse (path);
1527 reiserfs_warning (sb, "vs-13080: reiserfs_new_symlink: "
1528 "object with this key exists (%k)\n", &(ih->ih_key));
1529 return -EEXIST;
1530 }
1531
1532 /* insert item, that is body of symlink */
1533 return reiserfs_insert_item (th, path, &key, ih, symname);
1534 }
1535
1536
1537 /* inserts the stat data into the tree, and then calls
1538 reiserfs_new_directory (to insert ".", ".." item if new object is
1539 directory) or reiserfs_new_symlink (to insert symlink body if new
1540 object is symlink) or nothing (if new object is regular file)
1541
1542 NOTE! uid and gid must already be set in the inode. If we return
1543 non-zero due to an error, we have to drop the quota previously allocated
1544 for the fresh inode. This can only be done outside a transaction, so
1545 if we return non-zero, we also end the transaction.
1546
1547 */
reiserfs_new_inode(struct reiserfs_transaction_handle * th,struct inode * dir,int mode,const char * symname,int i_size,struct dentry * dentry,struct inode * inode)1548 int reiserfs_new_inode (struct reiserfs_transaction_handle *th,
1549 struct inode * dir, int mode,
1550 const char * symname,
1551 /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1552 strlen (symname) for symlinks) */
1553 int i_size,
1554 struct dentry *dentry,
1555 struct inode *inode)
1556 {
1557 struct super_block * sb;
1558 INITIALIZE_PATH (path_to_key);
1559 struct cpu_key key;
1560 struct item_head ih;
1561 struct stat_data sd;
1562 int retval;
1563 int err ;
1564
1565 if (!dir || !dir->i_nlink) {
1566 err = -EPERM ;
1567 goto out_bad_inode ;
1568 }
1569
1570 sb = dir->i_sb;
1571 inode -> u.reiserfs_i.i_attrs =
1572 dir -> u.reiserfs_i.i_attrs & REISERFS_INHERIT_MASK;
1573 sd_attrs_to_i_attrs( inode -> u.reiserfs_i.i_attrs, inode );
1574
1575 /* symlink cannot be immutable or append only, right? */
1576 if( S_ISLNK( inode -> i_mode ) )
1577 inode -> i_flags &= ~ ( S_IMMUTABLE | S_APPEND );
1578
1579 /* item head of new item */
1580 ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid;
1581 ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th));
1582 if (!ih.ih_key.k_objectid) {
1583 err = -ENOMEM ;
1584 goto out_bad_inode ;
1585 }
1586 if (old_format_only (sb))
1587 /* not a perfect generation count, as object ids can be reused, but this
1588 ** is as good as reiserfs can do right now.
1589 ** note that the private part of inode isn't filled in yet, we have
1590 ** to use the directory.
1591 */
1592 inode->i_generation = le32_to_cpu (INODE_PKEY (dir)->k_objectid);
1593 else
1594 #if defined( USE_INODE_GENERATION_COUNTER )
1595 inode->i_generation =
1596 le32_to_cpu( sb -> u.reiserfs_sb.s_rs -> s_inode_generation );
1597 #else
1598 inode->i_generation = ++event;
1599 #endif
1600 /* fill stat data */
1601 inode->i_nlink = (S_ISDIR (mode) ? 2 : 1);
1602
1603 /* uid and gid must already be set by the caller for quota init */
1604
1605 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1606 inode->i_size = i_size;
1607 inode->i_blocks = (inode->i_size + 511) >> 9;
1608 inode->u.reiserfs_i.i_first_direct_byte = S_ISLNK(mode) ? 1 :
1609 U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/;
1610
1611 INIT_LIST_HEAD(&inode->u.reiserfs_i.i_prealloc_list) ;
1612
1613 if (old_format_only (sb))
1614 make_le_item_head (&ih, 0, KEY_FORMAT_3_5, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1615 else
1616 make_le_item_head (&ih, 0, KEY_FORMAT_3_6, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1617
1618 /* key to search for correct place for new stat data */
1619 _make_cpu_key (&key, KEY_FORMAT_3_6, le32_to_cpu (ih.ih_key.k_dir_id),
1620 le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/);
1621
1622 /* find proper place for inserting of stat data */
1623 retval = search_item (sb, &key, &path_to_key);
1624 if (retval == IO_ERROR) {
1625 err = -EIO;
1626 goto out_bad_inode;
1627 }
1628 if (retval == ITEM_FOUND) {
1629 pathrelse (&path_to_key);
1630 err = -EEXIST;
1631 goto out_bad_inode;
1632 }
1633
1634 if (old_format_only (sb)) {
1635 if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
1636 pathrelse (&path_to_key);
1637 /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1638 err = -EINVAL;
1639 goto out_bad_inode;
1640 }
1641 inode2sd_v1 (&sd, inode);
1642 } else
1643 inode2sd (&sd, inode);
1644
1645 // these do not go to on-disk stat data
1646 inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid);
1647 inode->i_blksize = PAGE_SIZE;
1648 inode->i_dev = sb->s_dev;
1649
1650 // store in in-core inode the key of stat data and version all
1651 // object items will have (directory items will have old offset
1652 // format, other new objects will consist of new items)
1653 memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE);
1654 if (old_format_only (sb) || S_ISDIR(mode) || S_ISLNK(mode))
1655 set_inode_item_key_version (inode, KEY_FORMAT_3_5);
1656 else
1657 set_inode_item_key_version (inode, KEY_FORMAT_3_6);
1658 if (old_format_only (sb))
1659 set_inode_sd_version (inode, STAT_DATA_V1);
1660 else
1661 set_inode_sd_version (inode, STAT_DATA_V2);
1662
1663 /* insert the stat data into the tree */
1664 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1665 if (dir->u.reiserfs_i.new_packing_locality)
1666 th->displace_new_blocks = 1;
1667 #endif
1668 retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd));
1669 if (retval) {
1670 reiserfs_check_path(&path_to_key) ;
1671 err = retval;
1672 goto out_bad_inode;
1673 }
1674
1675 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1676 if (!th->displace_new_blocks)
1677 dir->u.reiserfs_i.new_packing_locality = 0;
1678 #endif
1679 if (S_ISDIR(mode)) {
1680 /* insert item with "." and ".." */
1681 retval = reiserfs_new_directory (th, &ih, &path_to_key, dir);
1682 }
1683
1684 if (S_ISLNK(mode)) {
1685 /* insert body of symlink */
1686 if (!old_format_only (sb))
1687 i_size = ROUND_UP(i_size);
1688 retval = reiserfs_new_symlink (th, &ih, &path_to_key, symname, i_size);
1689 }
1690 if (retval) {
1691 err = retval;
1692 reiserfs_check_path(&path_to_key) ;
1693 journal_end(th, th->t_super, th->t_blocks_allocated) ;
1694 goto out_inserted_sd;
1695 }
1696
1697 insert_inode_hash (inode);
1698 reiserfs_update_sd(th, inode) ;
1699 reiserfs_check_path(&path_to_key) ;
1700
1701 return 0;
1702 out_bad_inode:
1703 /* Invalidate the object, nothing was inserted yet */
1704 INODE_PKEY(inode)->k_objectid = 0;
1705
1706 /* dquot_drop must be done outside a transaction */
1707 journal_end(th, th->t_super, th->t_blocks_allocated) ;
1708 make_bad_inode(inode);
1709
1710 out_inserted_sd:
1711 inode->i_nlink = 0;
1712 th->t_trans_id = 0 ; /* so the caller can't use this handle later */
1713 iput(inode) ;
1714 return err;
1715 }
1716
1717 /*
1718 ** finds the tail page in the page cache,
1719 ** reads the last block in.
1720 **
1721 ** On success, page_result is set to a locked, pinned page, and bh_result
1722 ** is set to an up to date buffer for the last block in the file. returns 0.
1723 **
1724 ** tail conversion is not done, so bh_result might not be valid for writing
1725 ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
1726 ** trying to write the block.
1727 **
1728 ** on failure, nonzero is returned, page_result and bh_result are untouched.
1729 */
grab_tail_page(struct inode * p_s_inode,struct page ** page_result,struct buffer_head ** bh_result)1730 static int grab_tail_page(struct inode *p_s_inode,
1731 struct page **page_result,
1732 struct buffer_head **bh_result) {
1733
1734 /* we want the page with the last byte in the file,
1735 ** not the page that will hold the next byte for appending
1736 */
1737 unsigned long index = (p_s_inode->i_size-1) >> PAGE_CACHE_SHIFT ;
1738 unsigned long pos = 0 ;
1739 unsigned long start = 0 ;
1740 unsigned long blocksize = p_s_inode->i_sb->s_blocksize ;
1741 unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1) ;
1742 struct buffer_head *bh ;
1743 struct buffer_head *head ;
1744 struct page * page ;
1745 int error ;
1746
1747 /* we know that we are only called with inode->i_size > 0.
1748 ** we also know that a file tail can never be as big as a block
1749 ** If i_size % blocksize == 0, our file is currently block aligned
1750 ** and it won't need converting or zeroing after a truncate.
1751 */
1752 if ((offset & (blocksize - 1)) == 0) {
1753 return -ENOENT ;
1754 }
1755 page = grab_cache_page(p_s_inode->i_mapping, index) ;
1756 error = -ENOMEM ;
1757 if (!page) {
1758 goto out ;
1759 }
1760 /* start within the page of the last block in the file */
1761 start = (offset / blocksize) * blocksize ;
1762
1763 error = block_prepare_write(page, start, offset,
1764 reiserfs_get_block_create_0) ;
1765 if (error)
1766 goto unlock ;
1767
1768 kunmap(page) ; /* mapped by block_prepare_write */
1769
1770 head = page->buffers ;
1771 bh = head;
1772 do {
1773 if (pos >= start) {
1774 break ;
1775 }
1776 bh = bh->b_this_page ;
1777 pos += blocksize ;
1778 } while(bh != head) ;
1779
1780 if (!buffer_uptodate(bh)) {
1781 /* note, this should never happen, prepare_write should
1782 ** be taking care of this for us. If the buffer isn't up to date,
1783 ** I've screwed up the code to find the buffer, or the code to
1784 ** call prepare_write
1785 */
1786 reiserfs_warning(p_s_inode->i_sb, "clm-6000: error reading block %lu\n",
1787 bh->b_blocknr) ;
1788 error = -EIO ;
1789 goto unlock ;
1790 }
1791 *bh_result = bh ;
1792 *page_result = page ;
1793
1794 out:
1795 return error ;
1796
1797 unlock:
1798 UnlockPage(page) ;
1799 page_cache_release(page) ;
1800 return error ;
1801 }
1802
1803 /*
1804 ** vfs version of truncate file. Must NOT be called with
1805 ** a transaction already started.
1806 **
1807 ** some code taken from block_truncate_page
1808 */
reiserfs_truncate_file(struct inode * p_s_inode,int update_timestamps)1809 void reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) {
1810 struct reiserfs_transaction_handle th ;
1811 int windex ;
1812
1813 /* we want the offset for the first byte after the end of the file */
1814 unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1) ;
1815 unsigned blocksize = p_s_inode->i_sb->s_blocksize ;
1816 unsigned length ;
1817 struct page *page = NULL ;
1818 int error ;
1819 struct buffer_head *bh = NULL ;
1820
1821 if (p_s_inode->i_size > 0) {
1822 if ((error = grab_tail_page(p_s_inode, &page, &bh))) {
1823 // -ENOENT means we truncated past the end of the file,
1824 // and get_block_create_0 could not find a block to read in,
1825 // which is ok.
1826 if (error != -ENOENT)
1827 reiserfs_warning(p_s_inode->i_sb, "clm-6001: grab_tail_page failed %d\n", error);
1828 page = NULL ;
1829 bh = NULL ;
1830 }
1831 }
1832
1833 /* so, if page != NULL, we have a buffer head for the offset at
1834 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
1835 ** then we have an unformatted node. Otherwise, we have a direct item,
1836 ** and no zeroing is required on disk. We zero after the truncate,
1837 ** because the truncate might pack the item anyway
1838 ** (it will unmap bh if it packs).
1839 */
1840 /* it is enough to reserve space in transaction for 2 balancings:
1841 one for "save" link adding and another for the first
1842 cut_from_item. 1 is for update_sd */
1843 journal_begin(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
1844 reiserfs_update_inode_transaction(p_s_inode) ;
1845 windex = push_journal_writer("reiserfs_vfs_truncate_file") ;
1846 if (update_timestamps)
1847 /* we are doing real truncate: if the system crashes before the last
1848 transaction of truncating gets committed - on reboot the file
1849 either appears truncated properly or not truncated at all */
1850 add_save_link (&th, p_s_inode, 1);
1851 reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
1852 pop_journal_writer(windex) ;
1853 journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
1854
1855 if (update_timestamps)
1856 remove_save_link (p_s_inode, 1/* truncate */);
1857
1858 if (page) {
1859 length = offset & (blocksize - 1) ;
1860 /* if we are not on a block boundary */
1861 if (length) {
1862 length = blocksize - length ;
1863 memset((char *)kmap(page) + offset, 0, length) ;
1864 flush_dcache_page(page) ;
1865 kunmap(page) ;
1866 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1867 if (!atomic_set_buffer_dirty(bh)) {
1868 set_buffer_flushtime(bh);
1869 refile_buffer(bh);
1870 buffer_insert_inode_data_queue(bh, p_s_inode);
1871 balance_dirty();
1872 }
1873 }
1874 }
1875 UnlockPage(page) ;
1876 page_cache_release(page) ;
1877 }
1878
1879 return ;
1880 }
1881
map_block_for_writepage(struct inode * inode,struct buffer_head * bh_result,unsigned long block)1882 static int map_block_for_writepage(struct inode *inode,
1883 struct buffer_head *bh_result,
1884 unsigned long block) {
1885 struct reiserfs_transaction_handle th ;
1886 int fs_gen ;
1887 struct item_head tmp_ih ;
1888 struct item_head *ih ;
1889 struct buffer_head *bh ;
1890 __u32 *item ;
1891 struct cpu_key key ;
1892 INITIALIZE_PATH(path) ;
1893 int pos_in_item ;
1894 int jbegin_count = JOURNAL_PER_BALANCE_CNT ;
1895 loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ;
1896 int retval ;
1897 int use_get_block = 0 ;
1898 int bytes_copied = 0 ;
1899 int copy_size ;
1900
1901 kmap(bh_result->b_page) ;
1902 start_over:
1903 lock_kernel() ;
1904 journal_begin(&th, inode->i_sb, jbegin_count) ;
1905 reiserfs_update_inode_transaction(inode) ;
1906
1907 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
1908
1909 research:
1910 retval = search_for_position_by_key(inode->i_sb, &key, &path) ;
1911 if (retval != POSITION_FOUND) {
1912 use_get_block = 1;
1913 goto out ;
1914 }
1915
1916 bh = get_last_bh(&path) ;
1917 ih = get_ih(&path) ;
1918 item = get_item(&path) ;
1919 pos_in_item = path.pos_in_item ;
1920
1921 /* we've found an unformatted node */
1922 if (indirect_item_found(retval, ih)) {
1923 if (bytes_copied > 0) {
1924 reiserfs_warning(inode->i_sb, "clm-6002: bytes_copied %d\n", bytes_copied) ;
1925 }
1926 if (!get_block_num(item, pos_in_item)) {
1927 /* crap, we are writing to a hole */
1928 use_get_block = 1;
1929 goto out ;
1930 }
1931 set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode);
1932 mark_buffer_uptodate(bh_result, 1);
1933 } else if (is_direct_le_ih(ih)) {
1934 char *p ;
1935 p = page_address(bh_result->b_page) ;
1936 p += (byte_offset -1) & (PAGE_CACHE_SIZE - 1) ;
1937 copy_size = ih_item_len(ih) - pos_in_item;
1938
1939 fs_gen = get_generation(inode->i_sb) ;
1940 copy_item_head(&tmp_ih, ih) ;
1941 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1942 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
1943 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1944 goto research;
1945 }
1946
1947 memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ;
1948
1949 journal_mark_dirty(&th, inode->i_sb, bh) ;
1950 bytes_copied += copy_size ;
1951 set_block_dev_mapped(bh_result, 0, inode);
1952 mark_buffer_uptodate(bh_result, 1);
1953
1954 /* are there still bytes left? */
1955 if (bytes_copied < bh_result->b_size &&
1956 (byte_offset + bytes_copied) < inode->i_size) {
1957 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + copy_size) ;
1958 goto research ;
1959 }
1960 } else {
1961 reiserfs_warning(inode->i_sb, "clm-6003: bad item inode %lu\n", inode->i_ino) ;
1962 retval = -EIO ;
1963 goto out ;
1964 }
1965 retval = 0 ;
1966
1967 out:
1968 pathrelse(&path) ;
1969 journal_end(&th, inode->i_sb, jbegin_count) ;
1970 unlock_kernel() ;
1971
1972 /* this is where we fill in holes in the file. */
1973 if (use_get_block) {
1974 retval = reiserfs_get_block(inode, block, bh_result,
1975 GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM) ;
1976 if (!retval) {
1977 if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) {
1978 /* get_block failed to find a mapped unformatted node. */
1979 use_get_block = 0 ;
1980 goto start_over ;
1981 }
1982 }
1983 }
1984 kunmap(bh_result->b_page) ;
1985 return retval ;
1986 }
1987
1988 /* helper func to get a buffer head ready for writepage to send to
1989 ** ll_rw_block
1990 */
submit_bh_for_writepage(struct buffer_head ** bhp,int nr)1991 static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) {
1992 struct buffer_head *bh ;
1993 int i;
1994
1995 /* lock them all first so the end_io handler doesn't unlock the page
1996 ** too early
1997 */
1998 for(i = 0 ; i < nr ; i++) {
1999 bh = bhp[i] ;
2000 lock_buffer(bh) ;
2001 set_buffer_async_io(bh) ;
2002 }
2003 for(i = 0 ; i < nr ; i++) {
2004 /* submit_bh doesn't care if the buffer is dirty, but nobody
2005 ** later on in the call chain will be cleaning it. So, we
2006 ** clean the buffer here, it still gets written either way.
2007 */
2008 bh = bhp[i] ;
2009 clear_bit(BH_Dirty, &bh->b_state) ;
2010 set_bit(BH_Uptodate, &bh->b_state) ;
2011 submit_bh(WRITE, bh) ;
2012 }
2013 }
2014
reiserfs_write_full_page(struct page * page)2015 static int reiserfs_write_full_page(struct page *page) {
2016 struct inode *inode = page->mapping->host ;
2017 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
2018 unsigned last_offset = PAGE_CACHE_SIZE;
2019 int error = 0;
2020 unsigned long block ;
2021 unsigned cur_offset = 0 ;
2022 struct buffer_head *head, *bh ;
2023 int partial = 0 ;
2024 struct buffer_head *arr[PAGE_CACHE_SIZE/512] ;
2025 int nr = 0 ;
2026
2027 if (!page->buffers) {
2028 block_prepare_write(page, 0, 0, NULL) ;
2029 kunmap(page) ;
2030 }
2031 /* last page in the file, zero out any contents past the
2032 ** last byte in the file
2033 */
2034 if (page->index >= end_index) {
2035 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
2036 /* no file contents in this page */
2037 if (page->index >= end_index + 1 || !last_offset) {
2038 error = -EIO ;
2039 goto fail ;
2040 }
2041 memset((char *)kmap(page)+last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
2042 flush_dcache_page(page) ;
2043 kunmap(page) ;
2044 }
2045 head = page->buffers ;
2046 bh = head ;
2047 block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ;
2048 do {
2049 /* if this offset in the page is outside the file */
2050 if (cur_offset >= last_offset) {
2051 if (!buffer_uptodate(bh))
2052 partial = 1 ;
2053 } else {
2054 /* fast path, buffer mapped to an unformatted node */
2055 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2056 arr[nr++] = bh ;
2057 } else {
2058 /* buffer not mapped yet, or points to a direct item.
2059 ** search and dirty or log
2060 */
2061 if ((error = map_block_for_writepage(inode, bh, block))) {
2062 goto fail ;
2063 }
2064 /* map_block_for_writepage either found an unformatted node
2065 ** and mapped it for us, or it found a direct item
2066 ** and logged the changes.
2067 */
2068 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2069 arr[nr++] = bh ;
2070 }
2071 }
2072 }
2073 bh = bh->b_this_page ;
2074 cur_offset += bh->b_size ;
2075 block++ ;
2076 } while(bh != head) ;
2077
2078 /* if this page only had a direct item, it is very possible for
2079 ** nr == 0 without there being any kind of error.
2080 */
2081 if (nr) {
2082 submit_bh_for_writepage(arr, nr) ;
2083 wakeup_page_waiters(page);
2084 } else {
2085 UnlockPage(page) ;
2086 }
2087 if (!partial)
2088 SetPageUptodate(page) ;
2089
2090 return 0 ;
2091
2092 fail:
2093 if (nr) {
2094 submit_bh_for_writepage(arr, nr) ;
2095 } else {
2096 UnlockPage(page) ;
2097 }
2098 ClearPageUptodate(page) ;
2099 return error ;
2100 }
2101
2102
reiserfs_readpage(struct file * f,struct page * page)2103 static int reiserfs_readpage (struct file *f, struct page * page)
2104 {
2105 return block_read_full_page (page, reiserfs_get_block);
2106 }
2107
2108
reiserfs_writepage(struct page * page)2109 static int reiserfs_writepage (struct page * page)
2110 {
2111 struct inode *inode = page->mapping->host ;
2112 reiserfs_wait_on_write_block(inode->i_sb) ;
2113 return reiserfs_write_full_page(page) ;
2114 }
2115
2116
reiserfs_prepare_write(struct file * f,struct page * page,unsigned from,unsigned to)2117 int reiserfs_prepare_write(struct file *f, struct page *page,
2118 unsigned from, unsigned to) {
2119 struct inode *inode = page->mapping->host ;
2120 reiserfs_wait_on_write_block(inode->i_sb) ;
2121 fix_tail_page_for_writing(page) ;
2122 return block_prepare_write(page, from, to, reiserfs_get_block) ;
2123 }
2124
2125
reiserfs_aop_bmap(struct address_space * as,long block)2126 static int reiserfs_aop_bmap(struct address_space *as, long block) {
2127 return generic_block_bmap(as, block, reiserfs_bmap) ;
2128 }
2129
reiserfs_commit_write(struct file * f,struct page * page,unsigned from,unsigned to)2130 static int reiserfs_commit_write(struct file *f, struct page *page,
2131 unsigned from, unsigned to) {
2132 struct inode *inode = page->mapping->host ;
2133 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2134 int ret ;
2135
2136 reiserfs_wait_on_write_block(inode->i_sb) ;
2137
2138 /* generic_commit_write does this for us, but does not update the
2139 ** transaction tracking stuff when the size changes. So, we have
2140 ** to do the i_size updates here.
2141 */
2142 if (pos > inode->i_size) {
2143 struct reiserfs_transaction_handle th ;
2144 lock_kernel();
2145 /* If the file have grown beyond the border where it
2146 can have a tail, unmark it as needing a tail
2147 packing */
2148 if ( (have_large_tails (inode->i_sb) && inode->i_size > block_size (inode)*4) ||
2149 (have_small_tails (inode->i_sb) && inode->i_size > block_size(inode)) )
2150 inode->u.reiserfs_i.i_flags &= ~i_pack_on_close_mask;
2151
2152 journal_begin(&th, inode->i_sb, 1) ;
2153 reiserfs_update_inode_transaction(inode) ;
2154 inode->i_size = pos ;
2155 reiserfs_update_sd(&th, inode) ;
2156 journal_end(&th, inode->i_sb, 1) ;
2157 unlock_kernel();
2158 }
2159
2160 ret = generic_commit_write(f, page, from, to) ;
2161
2162 /* we test for O_SYNC here so we can commit the transaction
2163 ** for any packed tails the file might have had
2164 */
2165 if (f && (f->f_flags & O_SYNC)) {
2166 lock_kernel() ;
2167 reiserfs_commit_for_inode(inode) ;
2168 unlock_kernel();
2169 }
2170 return ret ;
2171 }
2172
sd_attrs_to_i_attrs(__u16 sd_attrs,struct inode * inode)2173 void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode )
2174 {
2175 if( reiserfs_attrs( inode -> i_sb ) ) {
2176 if( sd_attrs & REISERFS_SYNC_FL )
2177 inode -> i_flags |= S_SYNC;
2178 else
2179 inode -> i_flags &= ~S_SYNC;
2180 if( sd_attrs & REISERFS_IMMUTABLE_FL )
2181 inode -> i_flags |= S_IMMUTABLE;
2182 else
2183 inode -> i_flags &= ~S_IMMUTABLE;
2184 if( sd_attrs & REISERFS_APPEND_FL )
2185 inode -> i_flags |= S_APPEND;
2186 else
2187 inode -> i_flags &= ~S_APPEND;
2188 if( sd_attrs & REISERFS_NOATIME_FL )
2189 inode -> i_flags |= S_NOATIME;
2190 else
2191 inode -> i_flags &= ~S_NOATIME;
2192 if( sd_attrs & REISERFS_NOTAIL_FL )
2193 inode->u.reiserfs_i.i_flags |= i_nopack_mask;
2194 else
2195 inode->u.reiserfs_i.i_flags &= ~i_nopack_mask;
2196 }
2197 }
2198
i_attrs_to_sd_attrs(struct inode * inode,__u16 * sd_attrs)2199 void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs )
2200 {
2201 if( reiserfs_attrs( inode -> i_sb ) ) {
2202 if( inode -> i_flags & S_IMMUTABLE )
2203 *sd_attrs |= REISERFS_IMMUTABLE_FL;
2204 else
2205 *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
2206 if( inode -> i_flags & S_SYNC )
2207 *sd_attrs |= REISERFS_SYNC_FL;
2208 else
2209 *sd_attrs &= ~REISERFS_SYNC_FL;
2210 if( inode -> i_flags & S_NOATIME )
2211 *sd_attrs |= REISERFS_NOATIME_FL;
2212 else
2213 *sd_attrs &= ~REISERFS_NOATIME_FL;
2214 if( inode->u.reiserfs_i.i_flags & i_nopack_mask )
2215 *sd_attrs |= REISERFS_NOTAIL_FL;
2216 else
2217 *sd_attrs &= ~REISERFS_NOTAIL_FL;
2218 }
2219 }
2220
reiserfs_direct_io(int rw,struct inode * inode,struct kiobuf * iobuf,unsigned long blocknr,int blocksize)2221 static int reiserfs_direct_io(int rw, struct inode *inode,
2222 struct kiobuf *iobuf, unsigned long blocknr,
2223 int blocksize)
2224 {
2225 lock_kernel();
2226 reiserfs_commit_for_tail(inode);
2227 unlock_kernel();
2228 return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize,
2229 reiserfs_get_block_direct_io) ;
2230 }
2231
2232 struct address_space_operations reiserfs_address_space_operations = {
2233 writepage: reiserfs_writepage,
2234 readpage: reiserfs_readpage,
2235 sync_page: block_sync_page,
2236 prepare_write: reiserfs_prepare_write,
2237 commit_write: reiserfs_commit_write,
2238 bmap: reiserfs_aop_bmap,
2239 direct_IO: reiserfs_direct_io,
2240 } ;
2241