]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - fs/ocfs2/xattr.c
ocfs2: Provide a wrapper to brelse() xattr bucket buffers.
[mirror_ubuntu-bionic-kernel.git] / fs / ocfs2 / xattr.c
CommitLineData
f56654c4
TM
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * xattr.c
5 *
c3cb6827 6 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
f56654c4 7 *
cf1d6c76 8 * CREDITS:
c3cb6827
TY
9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
cf1d6c76 11 *
f56654c4
TM
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public
c3cb6827 14 * License version 2 as published by the Free Software Foundation.
f56654c4
TM
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
f56654c4
TM
20 */
21
cf1d6c76
TY
22#include <linux/capability.h>
23#include <linux/fs.h>
24#include <linux/types.h>
25#include <linux/slab.h>
26#include <linux/highmem.h>
27#include <linux/pagemap.h>
28#include <linux/uio.h>
29#include <linux/sched.h>
30#include <linux/splice.h>
31#include <linux/mount.h>
32#include <linux/writeback.h>
33#include <linux/falloc.h>
01225596 34#include <linux/sort.h>
99219aea
MF
35#include <linux/init.h>
36#include <linux/module.h>
37#include <linux/string.h>
cf1d6c76 38
f56654c4
TM
39#define MLOG_MASK_PREFIX ML_XATTR
40#include <cluster/masklog.h>
41
42#include "ocfs2.h"
43#include "alloc.h"
44#include "dlmglue.h"
45#include "file.h"
cf1d6c76
TY
46#include "symlink.h"
47#include "sysfile.h"
f56654c4
TM
48#include "inode.h"
49#include "journal.h"
50#include "ocfs2_fs.h"
51#include "suballoc.h"
52#include "uptodate.h"
53#include "buffer_head_io.h"
0c044f0b 54#include "super.h"
cf1d6c76
TY
55#include "xattr.h"
56
57
58struct ocfs2_xattr_def_value_root {
59 struct ocfs2_xattr_value_root xv;
60 struct ocfs2_extent_rec er;
61};
62
0c044f0b 63struct ocfs2_xattr_bucket {
4ac6032d 64 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
0c044f0b
TM
65};
66
cf1d6c76
TY
67#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
68#define OCFS2_XATTR_INLINE_SIZE 80
69
70static struct ocfs2_xattr_def_value_root def_xv = {
71 .xv.xr_list.l_count = cpu_to_le16(1),
72};
73
74struct xattr_handler *ocfs2_xattr_handlers[] = {
75 &ocfs2_xattr_user_handler,
76 &ocfs2_xattr_trusted_handler,
77 NULL
78};
79
c988fd04 80static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
cf1d6c76
TY
81 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
82 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
83};
84
85struct ocfs2_xattr_info {
86 int name_index;
87 const char *name;
88 const void *value;
89 size_t value_len;
90};
91
92struct ocfs2_xattr_search {
93 struct buffer_head *inode_bh;
94 /*
95 * xattr_bh point to the block buffer head which has extended attribute
96 * when extended attribute in inode, xattr_bh is equal to inode_bh.
97 */
98 struct buffer_head *xattr_bh;
99 struct ocfs2_xattr_header *header;
589dc260 100 struct ocfs2_xattr_bucket bucket;
cf1d6c76
TY
101 void *base;
102 void *end;
103 struct ocfs2_xattr_entry *here;
104 int not_found;
105};
106
589dc260
TM
107static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
108 struct ocfs2_xattr_header *xh,
109 int index,
110 int *block_off,
111 int *new_offset);
112
54f443f4
JB
113static int ocfs2_xattr_block_find(struct inode *inode,
114 int name_index,
115 const char *name,
116 struct ocfs2_xattr_search *xs);
589dc260
TM
117static int ocfs2_xattr_index_block_find(struct inode *inode,
118 struct buffer_head *root_bh,
119 int name_index,
120 const char *name,
121 struct ocfs2_xattr_search *xs);
122
0c044f0b
TM
123static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
124 struct ocfs2_xattr_tree_root *xt,
125 char *buffer,
126 size_t buffer_size);
127
01225596
TM
128static int ocfs2_xattr_create_index_block(struct inode *inode,
129 struct ocfs2_xattr_search *xs);
130
131static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
132 struct ocfs2_xattr_info *xi,
133 struct ocfs2_xattr_search *xs);
134
a3944256
TM
135static int ocfs2_delete_xattr_index_block(struct inode *inode,
136 struct buffer_head *xb_bh);
137
0030e001
TY
138static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
139{
140 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
141}
142
143static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
144{
145 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
146}
147
148static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
149{
150 u16 len = sb->s_blocksize -
151 offsetof(struct ocfs2_xattr_header, xh_entries);
152
153 return len / sizeof(struct ocfs2_xattr_entry);
154}
155
9c7759aa 156#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
51def39f 157#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
3e632946 158#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
9c7759aa 159
6dde41d9
JB
160static void ocfs2_xattr_bucket_relse(struct inode *inode,
161 struct ocfs2_xattr_bucket *bucket)
162{
163 int i, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
164
165 for (i = 0; i < blks; i++) {
166 brelse(bucket->bu_bhs[i]);
167 bucket->bu_bhs[i] = NULL;
168 }
169}
170
936b8834 171static inline const char *ocfs2_xattr_prefix(int name_index)
cf1d6c76
TY
172{
173 struct xattr_handler *handler = NULL;
174
175 if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
176 handler = ocfs2_xattr_handler_map[name_index];
177
936b8834 178 return handler ? handler->prefix : NULL;
cf1d6c76
TY
179}
180
40daa16a 181static u32 ocfs2_xattr_name_hash(struct inode *inode,
2057e5c6 182 const char *name,
40daa16a 183 int name_len)
cf1d6c76
TY
184{
185 /* Get hash value of uuid from super block */
186 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
187 int i;
188
cf1d6c76
TY
189 /* hash extended attribute name */
190 for (i = 0; i < name_len; i++) {
191 hash = (hash << OCFS2_HASH_SHIFT) ^
192 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
193 *name++;
194 }
195
196 return hash;
197}
198
199/*
200 * ocfs2_xattr_hash_entry()
201 *
202 * Compute the hash of an extended attribute.
203 */
204static void ocfs2_xattr_hash_entry(struct inode *inode,
205 struct ocfs2_xattr_header *header,
206 struct ocfs2_xattr_entry *entry)
207{
208 u32 hash = 0;
cf1d6c76 209 char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
cf1d6c76 210
2057e5c6 211 hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
cf1d6c76
TY
212 entry->xe_name_hash = cpu_to_le32(hash);
213
214 return;
215}
f56654c4
TM
216
217static int ocfs2_xattr_extend_allocation(struct inode *inode,
218 u32 clusters_to_add,
219 struct buffer_head *xattr_bh,
220 struct ocfs2_xattr_value_root *xv)
221{
222 int status = 0;
223 int restart_func = 0;
224 int credits = 0;
225 handle_t *handle = NULL;
226 struct ocfs2_alloc_context *data_ac = NULL;
227 struct ocfs2_alloc_context *meta_ac = NULL;
228 enum ocfs2_alloc_restarted why;
229 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
f56654c4 230 u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
f99b9b7c 231 struct ocfs2_extent_tree et;
f56654c4
TM
232
233 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
234
8d6220d6 235 ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);
f99b9b7c 236
f56654c4
TM
237restart_all:
238
f99b9b7c
JB
239 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
240 &data_ac, &meta_ac);
f56654c4
TM
241 if (status) {
242 mlog_errno(status);
243 goto leave;
244 }
245
f99b9b7c
JB
246 credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
247 clusters_to_add);
f56654c4
TM
248 handle = ocfs2_start_trans(osb, credits);
249 if (IS_ERR(handle)) {
250 status = PTR_ERR(handle);
251 handle = NULL;
252 mlog_errno(status);
253 goto leave;
254 }
255
256restarted_transaction:
257 status = ocfs2_journal_access(handle, inode, xattr_bh,
258 OCFS2_JOURNAL_ACCESS_WRITE);
259 if (status < 0) {
260 mlog_errno(status);
261 goto leave;
262 }
263
264 prev_clusters = le32_to_cpu(xv->xr_clusters);
265 status = ocfs2_add_clusters_in_btree(osb,
266 inode,
267 &logical_start,
268 clusters_to_add,
269 0,
f99b9b7c 270 &et,
f56654c4
TM
271 handle,
272 data_ac,
273 meta_ac,
f99b9b7c 274 &why);
f56654c4
TM
275 if ((status < 0) && (status != -EAGAIN)) {
276 if (status != -ENOSPC)
277 mlog_errno(status);
278 goto leave;
279 }
280
281 status = ocfs2_journal_dirty(handle, xattr_bh);
282 if (status < 0) {
283 mlog_errno(status);
284 goto leave;
285 }
286
287 clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
288
289 if (why != RESTART_NONE && clusters_to_add) {
290 if (why == RESTART_META) {
291 mlog(0, "restarting function.\n");
292 restart_func = 1;
293 } else {
294 BUG_ON(why != RESTART_TRANS);
295
296 mlog(0, "restarting transaction.\n");
297 /* TODO: This can be more intelligent. */
298 credits = ocfs2_calc_extend_credits(osb->sb,
f99b9b7c 299 et.et_root_el,
f56654c4
TM
300 clusters_to_add);
301 status = ocfs2_extend_trans(handle, credits);
302 if (status < 0) {
303 /* handle still has to be committed at
304 * this point. */
305 status = -ENOMEM;
306 mlog_errno(status);
307 goto leave;
308 }
309 goto restarted_transaction;
310 }
311 }
312
313leave:
314 if (handle) {
315 ocfs2_commit_trans(osb, handle);
316 handle = NULL;
317 }
318 if (data_ac) {
319 ocfs2_free_alloc_context(data_ac);
320 data_ac = NULL;
321 }
322 if (meta_ac) {
323 ocfs2_free_alloc_context(meta_ac);
324 meta_ac = NULL;
325 }
326 if ((!status) && restart_func) {
327 restart_func = 0;
328 goto restart_all;
329 }
330
331 return status;
332}
333
334static int __ocfs2_remove_xattr_range(struct inode *inode,
335 struct buffer_head *root_bh,
336 struct ocfs2_xattr_value_root *xv,
337 u32 cpos, u32 phys_cpos, u32 len,
338 struct ocfs2_cached_dealloc_ctxt *dealloc)
339{
340 int ret;
341 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
342 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
343 struct inode *tl_inode = osb->osb_tl_inode;
344 handle_t *handle;
345 struct ocfs2_alloc_context *meta_ac = NULL;
f99b9b7c
JB
346 struct ocfs2_extent_tree et;
347
8d6220d6 348 ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
f56654c4 349
f99b9b7c 350 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
f56654c4
TM
351 if (ret) {
352 mlog_errno(ret);
353 return ret;
354 }
355
356 mutex_lock(&tl_inode->i_mutex);
357
358 if (ocfs2_truncate_log_needs_flush(osb)) {
359 ret = __ocfs2_flush_truncate_log(osb);
360 if (ret < 0) {
361 mlog_errno(ret);
362 goto out;
363 }
364 }
365
366 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
367 if (IS_ERR(handle)) {
368 ret = PTR_ERR(handle);
369 mlog_errno(ret);
370 goto out;
371 }
372
373 ret = ocfs2_journal_access(handle, inode, root_bh,
374 OCFS2_JOURNAL_ACCESS_WRITE);
375 if (ret) {
376 mlog_errno(ret);
377 goto out_commit;
378 }
379
f99b9b7c
JB
380 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
381 dealloc);
f56654c4
TM
382 if (ret) {
383 mlog_errno(ret);
384 goto out_commit;
385 }
386
387 le32_add_cpu(&xv->xr_clusters, -len);
388
389 ret = ocfs2_journal_dirty(handle, root_bh);
390 if (ret) {
391 mlog_errno(ret);
392 goto out_commit;
393 }
394
395 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
396 if (ret)
397 mlog_errno(ret);
398
399out_commit:
400 ocfs2_commit_trans(osb, handle);
401out:
402 mutex_unlock(&tl_inode->i_mutex);
403
404 if (meta_ac)
405 ocfs2_free_alloc_context(meta_ac);
406
407 return ret;
408}
409
410static int ocfs2_xattr_shrink_size(struct inode *inode,
411 u32 old_clusters,
412 u32 new_clusters,
413 struct buffer_head *root_bh,
414 struct ocfs2_xattr_value_root *xv)
415{
416 int ret = 0;
417 u32 trunc_len, cpos, phys_cpos, alloc_size;
418 u64 block;
419 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
420 struct ocfs2_cached_dealloc_ctxt dealloc;
421
422 ocfs2_init_dealloc_ctxt(&dealloc);
423
424 if (old_clusters <= new_clusters)
425 return 0;
426
427 cpos = new_clusters;
428 trunc_len = old_clusters - new_clusters;
429 while (trunc_len) {
430 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
431 &alloc_size, &xv->xr_list);
432 if (ret) {
433 mlog_errno(ret);
434 goto out;
435 }
436
437 if (alloc_size > trunc_len)
438 alloc_size = trunc_len;
439
440 ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
441 phys_cpos, alloc_size,
442 &dealloc);
443 if (ret) {
444 mlog_errno(ret);
445 goto out;
446 }
447
448 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
449 ocfs2_remove_xattr_clusters_from_cache(inode, block,
450 alloc_size);
451 cpos += alloc_size;
452 trunc_len -= alloc_size;
453 }
454
455out:
456 ocfs2_schedule_truncate_log_flush(osb, 1);
457 ocfs2_run_deallocs(osb, &dealloc);
458
459 return ret;
460}
461
462static int ocfs2_xattr_value_truncate(struct inode *inode,
463 struct buffer_head *root_bh,
464 struct ocfs2_xattr_value_root *xv,
465 int len)
466{
467 int ret;
468 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
469 u32 old_clusters = le32_to_cpu(xv->xr_clusters);
470
471 if (new_clusters == old_clusters)
472 return 0;
473
474 if (new_clusters > old_clusters)
475 ret = ocfs2_xattr_extend_allocation(inode,
476 new_clusters - old_clusters,
477 root_bh, xv);
478 else
479 ret = ocfs2_xattr_shrink_size(inode,
480 old_clusters, new_clusters,
481 root_bh, xv);
482
483 return ret;
484}
cf1d6c76 485
936b8834
TM
486static int ocfs2_xattr_list_entry(char *buffer, size_t size,
487 size_t *result, const char *prefix,
488 const char *name, int name_len)
489{
490 char *p = buffer + *result;
491 int prefix_len = strlen(prefix);
492 int total_len = prefix_len + name_len + 1;
493
494 *result += total_len;
495
496 /* we are just looking for how big our buffer needs to be */
497 if (!size)
498 return 0;
499
500 if (*result > size)
501 return -ERANGE;
502
503 memcpy(p, prefix, prefix_len);
504 memcpy(p + prefix_len, name, name_len);
505 p[prefix_len + name_len] = '\0';
506
507 return 0;
508}
509
cf1d6c76
TY
510static int ocfs2_xattr_list_entries(struct inode *inode,
511 struct ocfs2_xattr_header *header,
512 char *buffer, size_t buffer_size)
513{
936b8834
TM
514 size_t result = 0;
515 int i, type, ret;
516 const char *prefix, *name;
cf1d6c76
TY
517
518 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
519 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
936b8834
TM
520 type = ocfs2_xattr_get_type(entry);
521 prefix = ocfs2_xattr_prefix(type);
522
523 if (prefix) {
524 name = (const char *)header +
525 le16_to_cpu(entry->xe_name_offset);
526
527 ret = ocfs2_xattr_list_entry(buffer, buffer_size,
528 &result, prefix, name,
529 entry->xe_name_len);
530 if (ret)
531 return ret;
cf1d6c76
TY
532 }
533 }
534
936b8834 535 return result;
cf1d6c76
TY
536}
537
538static int ocfs2_xattr_ibody_list(struct inode *inode,
539 struct ocfs2_dinode *di,
540 char *buffer,
541 size_t buffer_size)
542{
543 struct ocfs2_xattr_header *header = NULL;
544 struct ocfs2_inode_info *oi = OCFS2_I(inode);
545 int ret = 0;
546
547 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
548 return ret;
549
550 header = (struct ocfs2_xattr_header *)
551 ((void *)di + inode->i_sb->s_blocksize -
552 le16_to_cpu(di->i_xattr_inline_size));
553
554 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
555
556 return ret;
557}
558
559static int ocfs2_xattr_block_list(struct inode *inode,
560 struct ocfs2_dinode *di,
561 char *buffer,
562 size_t buffer_size)
563{
564 struct buffer_head *blk_bh = NULL;
0c044f0b 565 struct ocfs2_xattr_block *xb;
cf1d6c76
TY
566 int ret = 0;
567
568 if (!di->i_xattr_loc)
569 return ret;
570
0fcaa56a 571 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
cf1d6c76
TY
572 if (ret < 0) {
573 mlog_errno(ret);
574 return ret;
575 }
cf1d6c76 576
0c044f0b 577 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
f6087fb7
JB
578 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
579 ret = -EIO;
580 goto cleanup;
581 }
cf1d6c76 582
0c044f0b
TM
583 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
584 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
585 ret = ocfs2_xattr_list_entries(inode, header,
586 buffer, buffer_size);
587 } else {
588 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
589 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
590 buffer, buffer_size);
591 }
cf1d6c76
TY
592cleanup:
593 brelse(blk_bh);
594
595 return ret;
596}
597
598ssize_t ocfs2_listxattr(struct dentry *dentry,
599 char *buffer,
600 size_t size)
601{
602 int ret = 0, i_ret = 0, b_ret = 0;
603 struct buffer_head *di_bh = NULL;
604 struct ocfs2_dinode *di = NULL;
605 struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
606
8154da3d
TY
607 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
608 return -EOPNOTSUPP;
609
cf1d6c76
TY
610 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
611 return ret;
612
613 ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
614 if (ret < 0) {
615 mlog_errno(ret);
616 return ret;
617 }
618
619 di = (struct ocfs2_dinode *)di_bh->b_data;
620
621 down_read(&oi->ip_xattr_sem);
622 i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
623 if (i_ret < 0)
624 b_ret = 0;
625 else {
626 if (buffer) {
627 buffer += i_ret;
628 size -= i_ret;
629 }
630 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
631 buffer, size);
632 if (b_ret < 0)
633 i_ret = 0;
634 }
635 up_read(&oi->ip_xattr_sem);
636 ocfs2_inode_unlock(dentry->d_inode, 0);
637
638 brelse(di_bh);
639
640 return i_ret + b_ret;
641}
642
643static int ocfs2_xattr_find_entry(int name_index,
644 const char *name,
645 struct ocfs2_xattr_search *xs)
646{
647 struct ocfs2_xattr_entry *entry;
648 size_t name_len;
649 int i, cmp = 1;
650
651 if (name == NULL)
652 return -EINVAL;
653
654 name_len = strlen(name);
655 entry = xs->here;
656 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
657 cmp = name_index - ocfs2_xattr_get_type(entry);
658 if (!cmp)
659 cmp = name_len - entry->xe_name_len;
660 if (!cmp)
661 cmp = memcmp(name, (xs->base +
662 le16_to_cpu(entry->xe_name_offset)),
663 name_len);
664 if (cmp == 0)
665 break;
666 entry += 1;
667 }
668 xs->here = entry;
669
670 return cmp ? -ENODATA : 0;
671}
672
673static int ocfs2_xattr_get_value_outside(struct inode *inode,
589dc260 674 struct ocfs2_xattr_value_root *xv,
cf1d6c76
TY
675 void *buffer,
676 size_t len)
677{
678 u32 cpos, p_cluster, num_clusters, bpc, clusters;
679 u64 blkno;
680 int i, ret = 0;
681 size_t cplen, blocksize;
682 struct buffer_head *bh = NULL;
cf1d6c76
TY
683 struct ocfs2_extent_list *el;
684
cf1d6c76
TY
685 el = &xv->xr_list;
686 clusters = le32_to_cpu(xv->xr_clusters);
687 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
688 blocksize = inode->i_sb->s_blocksize;
689
690 cpos = 0;
691 while (cpos < clusters) {
692 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
693 &num_clusters, el);
694 if (ret) {
695 mlog_errno(ret);
696 goto out;
697 }
698
699 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
700 /* Copy ocfs2_xattr_value */
701 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
0fcaa56a 702 ret = ocfs2_read_block(inode, blkno, &bh);
cf1d6c76
TY
703 if (ret) {
704 mlog_errno(ret);
705 goto out;
706 }
707
708 cplen = len >= blocksize ? blocksize : len;
709 memcpy(buffer, bh->b_data, cplen);
710 len -= cplen;
711 buffer += cplen;
712
713 brelse(bh);
714 bh = NULL;
715 if (len == 0)
716 break;
717 }
718 cpos += num_clusters;
719 }
720out:
721 return ret;
722}
723
724static int ocfs2_xattr_ibody_get(struct inode *inode,
725 int name_index,
726 const char *name,
727 void *buffer,
728 size_t buffer_size,
729 struct ocfs2_xattr_search *xs)
730{
731 struct ocfs2_inode_info *oi = OCFS2_I(inode);
732 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
589dc260 733 struct ocfs2_xattr_value_root *xv;
cf1d6c76
TY
734 size_t size;
735 int ret = 0;
736
737 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
738 return -ENODATA;
739
740 xs->end = (void *)di + inode->i_sb->s_blocksize;
741 xs->header = (struct ocfs2_xattr_header *)
742 (xs->end - le16_to_cpu(di->i_xattr_inline_size));
743 xs->base = (void *)xs->header;
744 xs->here = xs->header->xh_entries;
745
746 ret = ocfs2_xattr_find_entry(name_index, name, xs);
747 if (ret)
748 return ret;
749 size = le64_to_cpu(xs->here->xe_value_size);
750 if (buffer) {
751 if (size > buffer_size)
752 return -ERANGE;
753 if (ocfs2_xattr_is_local(xs->here)) {
754 memcpy(buffer, (void *)xs->base +
755 le16_to_cpu(xs->here->xe_name_offset) +
756 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
757 } else {
589dc260
TM
758 xv = (struct ocfs2_xattr_value_root *)
759 (xs->base + le16_to_cpu(
760 xs->here->xe_name_offset) +
761 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
762 ret = ocfs2_xattr_get_value_outside(inode, xv,
cf1d6c76
TY
763 buffer, size);
764 if (ret < 0) {
765 mlog_errno(ret);
766 return ret;
767 }
768 }
769 }
770
771 return size;
772}
773
774static int ocfs2_xattr_block_get(struct inode *inode,
775 int name_index,
776 const char *name,
777 void *buffer,
778 size_t buffer_size,
779 struct ocfs2_xattr_search *xs)
780{
cf1d6c76 781 struct ocfs2_xattr_block *xb;
589dc260 782 struct ocfs2_xattr_value_root *xv;
cf1d6c76 783 size_t size;
589dc260 784 int ret = -ENODATA, name_offset, name_len, block_off, i;
cf1d6c76 785
589dc260
TM
786 memset(&xs->bucket, 0, sizeof(xs->bucket));
787
54f443f4
JB
788 ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
789 if (ret) {
cf1d6c76 790 mlog_errno(ret);
cf1d6c76
TY
791 goto cleanup;
792 }
793
6c1e183e
TY
794 if (xs->not_found) {
795 ret = -ENODATA;
796 goto cleanup;
797 }
798
54f443f4 799 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
cf1d6c76
TY
800 size = le64_to_cpu(xs->here->xe_value_size);
801 if (buffer) {
802 ret = -ERANGE;
803 if (size > buffer_size)
804 goto cleanup;
589dc260
TM
805
806 name_offset = le16_to_cpu(xs->here->xe_name_offset);
807 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
808 i = xs->here - xs->header->xh_entries;
809
810 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
811 ret = ocfs2_xattr_bucket_get_name_value(inode,
3e632946 812 bucket_xh(&xs->bucket),
589dc260
TM
813 i,
814 &block_off,
815 &name_offset);
51def39f 816 xs->base = bucket_block(&xs->bucket, block_off);
589dc260 817 }
cf1d6c76
TY
818 if (ocfs2_xattr_is_local(xs->here)) {
819 memcpy(buffer, (void *)xs->base +
589dc260 820 name_offset + name_len, size);
cf1d6c76 821 } else {
589dc260
TM
822 xv = (struct ocfs2_xattr_value_root *)
823 (xs->base + name_offset + name_len);
824 ret = ocfs2_xattr_get_value_outside(inode, xv,
cf1d6c76
TY
825 buffer, size);
826 if (ret < 0) {
827 mlog_errno(ret);
828 goto cleanup;
829 }
830 }
831 }
832 ret = size;
833cleanup:
6dde41d9 834 ocfs2_xattr_bucket_relse(inode, &xs->bucket);
589dc260 835 memset(&xs->bucket, 0, sizeof(xs->bucket));
cf1d6c76 836
54f443f4
JB
837 brelse(xs->xattr_bh);
838 xs->xattr_bh = NULL;
cf1d6c76
TY
839 return ret;
840}
841
842/* ocfs2_xattr_get()
843 *
844 * Copy an extended attribute into the buffer provided.
845 * Buffer is NULL to compute the size of buffer required.
846 */
0030e001
TY
847static int ocfs2_xattr_get(struct inode *inode,
848 int name_index,
849 const char *name,
850 void *buffer,
851 size_t buffer_size)
cf1d6c76
TY
852{
853 int ret;
854 struct ocfs2_dinode *di = NULL;
855 struct buffer_head *di_bh = NULL;
856 struct ocfs2_inode_info *oi = OCFS2_I(inode);
857 struct ocfs2_xattr_search xis = {
858 .not_found = -ENODATA,
859 };
860 struct ocfs2_xattr_search xbs = {
861 .not_found = -ENODATA,
862 };
863
8154da3d
TY
864 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
865 return -EOPNOTSUPP;
866
cf1d6c76
TY
867 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
868 ret = -ENODATA;
869
870 ret = ocfs2_inode_lock(inode, &di_bh, 0);
871 if (ret < 0) {
872 mlog_errno(ret);
873 return ret;
874 }
875 xis.inode_bh = xbs.inode_bh = di_bh;
876 di = (struct ocfs2_dinode *)di_bh->b_data;
877
878 down_read(&oi->ip_xattr_sem);
879 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
880 buffer_size, &xis);
6c1e183e 881 if (ret == -ENODATA && di->i_xattr_loc)
cf1d6c76
TY
882 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
883 buffer_size, &xbs);
884 up_read(&oi->ip_xattr_sem);
885 ocfs2_inode_unlock(inode, 0);
886
887 brelse(di_bh);
888
889 return ret;
890}
891
892static int __ocfs2_xattr_set_value_outside(struct inode *inode,
893 struct ocfs2_xattr_value_root *xv,
894 const void *value,
895 int value_len)
896{
897 int ret = 0, i, cp_len, credits;
898 u16 blocksize = inode->i_sb->s_blocksize;
899 u32 p_cluster, num_clusters;
900 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
901 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
902 u64 blkno;
903 struct buffer_head *bh = NULL;
904 handle_t *handle;
905
906 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
907
908 credits = clusters * bpc;
909 handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
910 if (IS_ERR(handle)) {
911 ret = PTR_ERR(handle);
912 mlog_errno(ret);
913 goto out;
914 }
915
916 while (cpos < clusters) {
917 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
918 &num_clusters, &xv->xr_list);
919 if (ret) {
920 mlog_errno(ret);
921 goto out_commit;
922 }
923
924 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
925
926 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
0fcaa56a 927 ret = ocfs2_read_block(inode, blkno, &bh);
cf1d6c76
TY
928 if (ret) {
929 mlog_errno(ret);
930 goto out_commit;
931 }
932
933 ret = ocfs2_journal_access(handle,
934 inode,
935 bh,
936 OCFS2_JOURNAL_ACCESS_WRITE);
937 if (ret < 0) {
938 mlog_errno(ret);
939 goto out_commit;
940 }
941
942 cp_len = value_len > blocksize ? blocksize : value_len;
943 memcpy(bh->b_data, value, cp_len);
944 value_len -= cp_len;
945 value += cp_len;
946 if (cp_len < blocksize)
947 memset(bh->b_data + cp_len, 0,
948 blocksize - cp_len);
949
950 ret = ocfs2_journal_dirty(handle, bh);
951 if (ret < 0) {
952 mlog_errno(ret);
953 goto out_commit;
954 }
955 brelse(bh);
956 bh = NULL;
957
958 /*
959 * XXX: do we need to empty all the following
960 * blocks in this cluster?
961 */
962 if (!value_len)
963 break;
964 }
965 cpos += num_clusters;
966 }
967out_commit:
968 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
969out:
970 brelse(bh);
971
972 return ret;
973}
974
975static int ocfs2_xattr_cleanup(struct inode *inode,
976 struct ocfs2_xattr_info *xi,
977 struct ocfs2_xattr_search *xs,
978 size_t offs)
979{
980 handle_t *handle = NULL;
981 int ret = 0;
982 size_t name_len = strlen(xi->name);
983 void *val = xs->base + offs;
984 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
985
986 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
987 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
988 if (IS_ERR(handle)) {
989 ret = PTR_ERR(handle);
990 mlog_errno(ret);
991 goto out;
992 }
993 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
994 OCFS2_JOURNAL_ACCESS_WRITE);
995 if (ret) {
996 mlog_errno(ret);
997 goto out_commit;
998 }
999 /* Decrease xattr count */
1000 le16_add_cpu(&xs->header->xh_count, -1);
1001 /* Remove the xattr entry and tree root which has already be set*/
1002 memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1003 memset(val, 0, size);
1004
1005 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1006 if (ret < 0)
1007 mlog_errno(ret);
1008out_commit:
1009 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1010out:
1011 return ret;
1012}
1013
1014static int ocfs2_xattr_update_entry(struct inode *inode,
1015 struct ocfs2_xattr_info *xi,
1016 struct ocfs2_xattr_search *xs,
1017 size_t offs)
1018{
1019 handle_t *handle = NULL;
1020 int ret = 0;
1021
1022 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1023 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1024 if (IS_ERR(handle)) {
1025 ret = PTR_ERR(handle);
1026 mlog_errno(ret);
1027 goto out;
1028 }
1029 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1030 OCFS2_JOURNAL_ACCESS_WRITE);
1031 if (ret) {
1032 mlog_errno(ret);
1033 goto out_commit;
1034 }
1035
1036 xs->here->xe_name_offset = cpu_to_le16(offs);
1037 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1038 if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1039 ocfs2_xattr_set_local(xs->here, 1);
1040 else
1041 ocfs2_xattr_set_local(xs->here, 0);
1042 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1043
1044 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1045 if (ret < 0)
1046 mlog_errno(ret);
1047out_commit:
1048 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1049out:
1050 return ret;
1051}
1052
1053/*
1054 * ocfs2_xattr_set_value_outside()
1055 *
1056 * Set large size value in B tree.
1057 */
1058static int ocfs2_xattr_set_value_outside(struct inode *inode,
1059 struct ocfs2_xattr_info *xi,
1060 struct ocfs2_xattr_search *xs,
1061 size_t offs)
1062{
1063 size_t name_len = strlen(xi->name);
1064 void *val = xs->base + offs;
1065 struct ocfs2_xattr_value_root *xv = NULL;
1066 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1067 int ret = 0;
1068
1069 memset(val, 0, size);
1070 memcpy(val, xi->name, name_len);
1071 xv = (struct ocfs2_xattr_value_root *)
1072 (val + OCFS2_XATTR_SIZE(name_len));
1073 xv->xr_clusters = 0;
1074 xv->xr_last_eb_blk = 0;
1075 xv->xr_list.l_tree_depth = 0;
1076 xv->xr_list.l_count = cpu_to_le16(1);
1077 xv->xr_list.l_next_free_rec = 0;
1078
1079 ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
1080 xi->value_len);
1081 if (ret < 0) {
1082 mlog_errno(ret);
1083 return ret;
1084 }
1085 ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value,
1086 xi->value_len);
1087 if (ret < 0) {
1088 mlog_errno(ret);
1089 return ret;
1090 }
1091 ret = ocfs2_xattr_update_entry(inode, xi, xs, offs);
1092 if (ret < 0)
1093 mlog_errno(ret);
1094
1095 return ret;
1096}
1097
1098/*
1099 * ocfs2_xattr_set_entry_local()
1100 *
1101 * Set, replace or remove extended attribute in local.
1102 */
1103static void ocfs2_xattr_set_entry_local(struct inode *inode,
1104 struct ocfs2_xattr_info *xi,
1105 struct ocfs2_xattr_search *xs,
1106 struct ocfs2_xattr_entry *last,
1107 size_t min_offs)
1108{
1109 size_t name_len = strlen(xi->name);
1110 int i;
1111
1112 if (xi->value && xs->not_found) {
1113 /* Insert the new xattr entry. */
1114 le16_add_cpu(&xs->header->xh_count, 1);
1115 ocfs2_xattr_set_type(last, xi->name_index);
1116 ocfs2_xattr_set_local(last, 1);
1117 last->xe_name_len = name_len;
1118 } else {
1119 void *first_val;
1120 void *val;
1121 size_t offs, size;
1122
1123 first_val = xs->base + min_offs;
1124 offs = le16_to_cpu(xs->here->xe_name_offset);
1125 val = xs->base + offs;
1126
1127 if (le64_to_cpu(xs->here->xe_value_size) >
1128 OCFS2_XATTR_INLINE_SIZE)
1129 size = OCFS2_XATTR_SIZE(name_len) +
1130 OCFS2_XATTR_ROOT_SIZE;
1131 else
1132 size = OCFS2_XATTR_SIZE(name_len) +
1133 OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1134
1135 if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1136 OCFS2_XATTR_SIZE(xi->value_len)) {
1137 /* The old and the new value have the
1138 same size. Just replace the value. */
1139 ocfs2_xattr_set_local(xs->here, 1);
1140 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1141 /* Clear value bytes. */
1142 memset(val + OCFS2_XATTR_SIZE(name_len),
1143 0,
1144 OCFS2_XATTR_SIZE(xi->value_len));
1145 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1146 xi->value,
1147 xi->value_len);
1148 return;
1149 }
1150 /* Remove the old name+value. */
1151 memmove(first_val + size, first_val, val - first_val);
1152 memset(first_val, 0, size);
1153 xs->here->xe_name_hash = 0;
1154 xs->here->xe_name_offset = 0;
1155 ocfs2_xattr_set_local(xs->here, 1);
1156 xs->here->xe_value_size = 0;
1157
1158 min_offs += size;
1159
1160 /* Adjust all value offsets. */
1161 last = xs->header->xh_entries;
1162 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1163 size_t o = le16_to_cpu(last->xe_name_offset);
1164
1165 if (o < offs)
1166 last->xe_name_offset = cpu_to_le16(o + size);
1167 last += 1;
1168 }
1169
1170 if (!xi->value) {
1171 /* Remove the old entry. */
1172 last -= 1;
1173 memmove(xs->here, xs->here + 1,
1174 (void *)last - (void *)xs->here);
1175 memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1176 le16_add_cpu(&xs->header->xh_count, -1);
1177 }
1178 }
1179 if (xi->value) {
1180 /* Insert the new name+value. */
1181 size_t size = OCFS2_XATTR_SIZE(name_len) +
1182 OCFS2_XATTR_SIZE(xi->value_len);
1183 void *val = xs->base + min_offs - size;
1184
1185 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1186 memset(val, 0, size);
1187 memcpy(val, xi->name, name_len);
1188 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1189 xi->value,
1190 xi->value_len);
1191 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1192 ocfs2_xattr_set_local(xs->here, 1);
1193 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1194 }
1195
1196 return;
1197}
1198
1199/*
1200 * ocfs2_xattr_set_entry()
1201 *
1202 * Set extended attribute entry into inode or block.
1203 *
1204 * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1205 * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1206 * then set value in B tree with set_value_outside().
1207 */
1208static int ocfs2_xattr_set_entry(struct inode *inode,
1209 struct ocfs2_xattr_info *xi,
1210 struct ocfs2_xattr_search *xs,
1211 int flag)
1212{
1213 struct ocfs2_xattr_entry *last;
1214 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1215 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1216 size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1217 size_t size_l = 0;
1218 handle_t *handle = NULL;
1219 int free, i, ret;
1220 struct ocfs2_xattr_info xi_l = {
1221 .name_index = xi->name_index,
1222 .name = xi->name,
1223 .value = xi->value,
1224 .value_len = xi->value_len,
1225 };
1226
1227 /* Compute min_offs, last and free space. */
1228 last = xs->header->xh_entries;
1229
1230 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1231 size_t offs = le16_to_cpu(last->xe_name_offset);
1232 if (offs < min_offs)
1233 min_offs = offs;
1234 last += 1;
1235 }
1236
1237 free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1238 if (free < 0)
b37c4d84 1239 return -EIO;
cf1d6c76
TY
1240
1241 if (!xs->not_found) {
1242 size_t size = 0;
1243 if (ocfs2_xattr_is_local(xs->here))
1244 size = OCFS2_XATTR_SIZE(name_len) +
1245 OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1246 else
1247 size = OCFS2_XATTR_SIZE(name_len) +
1248 OCFS2_XATTR_ROOT_SIZE;
1249 free += (size + sizeof(struct ocfs2_xattr_entry));
1250 }
1251 /* Check free space in inode or block */
1252 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1253 if (free < sizeof(struct ocfs2_xattr_entry) +
1254 OCFS2_XATTR_SIZE(name_len) +
1255 OCFS2_XATTR_ROOT_SIZE) {
1256 ret = -ENOSPC;
1257 goto out;
1258 }
1259 size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1260 xi_l.value = (void *)&def_xv;
1261 xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1262 } else if (xi->value) {
1263 if (free < sizeof(struct ocfs2_xattr_entry) +
1264 OCFS2_XATTR_SIZE(name_len) +
1265 OCFS2_XATTR_SIZE(xi->value_len)) {
1266 ret = -ENOSPC;
1267 goto out;
1268 }
1269 }
1270
1271 if (!xs->not_found) {
1272 /* For existing extended attribute */
1273 size_t size = OCFS2_XATTR_SIZE(name_len) +
1274 OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1275 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1276 void *val = xs->base + offs;
1277
1278 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1279 /* Replace existing local xattr with tree root */
1280 ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1281 offs);
1282 if (ret < 0)
1283 mlog_errno(ret);
1284 goto out;
1285 } else if (!ocfs2_xattr_is_local(xs->here)) {
1286 /* For existing xattr which has value outside */
1287 struct ocfs2_xattr_value_root *xv = NULL;
1288 xv = (struct ocfs2_xattr_value_root *)(val +
1289 OCFS2_XATTR_SIZE(name_len));
1290
1291 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1292 /*
1293 * If new value need set outside also,
1294 * first truncate old value to new value,
1295 * then set new value with set_value_outside().
1296 */
1297 ret = ocfs2_xattr_value_truncate(inode,
1298 xs->xattr_bh,
1299 xv,
1300 xi->value_len);
1301 if (ret < 0) {
1302 mlog_errno(ret);
1303 goto out;
1304 }
1305
1306 ret = __ocfs2_xattr_set_value_outside(inode,
1307 xv,
1308 xi->value,
1309 xi->value_len);
1310 if (ret < 0) {
1311 mlog_errno(ret);
1312 goto out;
1313 }
1314
1315 ret = ocfs2_xattr_update_entry(inode,
1316 xi,
1317 xs,
1318 offs);
1319 if (ret < 0)
1320 mlog_errno(ret);
1321 goto out;
1322 } else {
1323 /*
1324 * If new value need set in local,
1325 * just trucate old value to zero.
1326 */
1327 ret = ocfs2_xattr_value_truncate(inode,
1328 xs->xattr_bh,
1329 xv,
1330 0);
1331 if (ret < 0)
1332 mlog_errno(ret);
1333 }
1334 }
1335 }
1336
1337 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1338 OCFS2_INODE_UPDATE_CREDITS);
1339 if (IS_ERR(handle)) {
1340 ret = PTR_ERR(handle);
1341 mlog_errno(ret);
1342 goto out;
1343 }
1344
1345 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1346 OCFS2_JOURNAL_ACCESS_WRITE);
1347 if (ret) {
1348 mlog_errno(ret);
1349 goto out_commit;
1350 }
1351
1352 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
28b8ca0b 1353 /* set extended attribute in external block. */
cf1d6c76 1354 ret = ocfs2_extend_trans(handle,
28b8ca0b 1355 OCFS2_INODE_UPDATE_CREDITS +
cf1d6c76
TY
1356 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1357 if (ret) {
1358 mlog_errno(ret);
1359 goto out_commit;
1360 }
1361 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1362 OCFS2_JOURNAL_ACCESS_WRITE);
1363 if (ret) {
1364 mlog_errno(ret);
1365 goto out_commit;
1366 }
1367 }
1368
1369 /*
1370 * Set value in local, include set tree root in local.
1371 * This is the first step for value size >INLINE_SIZE.
1372 */
1373 ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1374
1375 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1376 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1377 if (ret < 0) {
1378 mlog_errno(ret);
1379 goto out_commit;
1380 }
1381 }
1382
1383 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1384 (flag & OCFS2_INLINE_XATTR_FL)) {
1385 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1386 unsigned int xattrsize = osb->s_xattr_inline_size;
1387
1388 /*
1389 * Adjust extent record count or inline data size
1390 * to reserve space for extended attribute.
1391 */
1392 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1393 struct ocfs2_inline_data *idata = &di->id2.i_data;
1394 le16_add_cpu(&idata->id_count, -xattrsize);
1395 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1396 struct ocfs2_extent_list *el = &di->id2.i_list;
1397 le16_add_cpu(&el->l_count, -(xattrsize /
1398 sizeof(struct ocfs2_extent_rec)));
1399 }
1400 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1401 }
1402 /* Update xattr flag */
1403 spin_lock(&oi->ip_lock);
1404 oi->ip_dyn_features |= flag;
1405 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1406 spin_unlock(&oi->ip_lock);
1407 /* Update inode ctime */
1408 inode->i_ctime = CURRENT_TIME;
1409 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1410 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1411
1412 ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1413 if (ret < 0)
1414 mlog_errno(ret);
1415
1416out_commit:
1417 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1418
1419 if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1420 /*
1421 * Set value outside in B tree.
1422 * This is the second step for value size > INLINE_SIZE.
1423 */
1424 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1425 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs);
1426 if (ret < 0) {
1427 int ret2;
1428
1429 mlog_errno(ret);
1430 /*
1431 * If set value outside failed, we have to clean
1432 * the junk tree root we have already set in local.
1433 */
1434 ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs);
1435 if (ret2 < 0)
1436 mlog_errno(ret2);
1437 }
1438 }
1439out:
1440 return ret;
1441
1442}
1443
cf1d6c76
TY
1444static int ocfs2_remove_value_outside(struct inode*inode,
1445 struct buffer_head *bh,
1446 struct ocfs2_xattr_header *header)
1447{
1448 int ret = 0, i;
1449
1450 for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1451 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1452
1453 if (!ocfs2_xattr_is_local(entry)) {
1454 struct ocfs2_xattr_value_root *xv;
1455 void *val;
1456
1457 val = (void *)header +
1458 le16_to_cpu(entry->xe_name_offset);
1459 xv = (struct ocfs2_xattr_value_root *)
1460 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1461 ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0);
1462 if (ret < 0) {
1463 mlog_errno(ret);
1464 return ret;
1465 }
1466 }
1467 }
1468
1469 return ret;
1470}
1471
1472static int ocfs2_xattr_ibody_remove(struct inode *inode,
1473 struct buffer_head *di_bh)
1474{
1475
1476 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1477 struct ocfs2_xattr_header *header;
1478 int ret;
1479
1480 header = (struct ocfs2_xattr_header *)
1481 ((void *)di + inode->i_sb->s_blocksize -
1482 le16_to_cpu(di->i_xattr_inline_size));
1483
1484 ret = ocfs2_remove_value_outside(inode, di_bh, header);
1485
1486 return ret;
1487}
1488
1489static int ocfs2_xattr_block_remove(struct inode *inode,
1490 struct buffer_head *blk_bh)
1491{
1492 struct ocfs2_xattr_block *xb;
cf1d6c76
TY
1493 int ret = 0;
1494
1495 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
a3944256
TM
1496 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1497 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1498 ret = ocfs2_remove_value_outside(inode, blk_bh, header);
1499 } else
1500 ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
cf1d6c76
TY
1501
1502 return ret;
1503}
1504
08413899
TM
1505static int ocfs2_xattr_free_block(struct inode *inode,
1506 u64 block)
1507{
1508 struct inode *xb_alloc_inode;
1509 struct buffer_head *xb_alloc_bh = NULL;
1510 struct buffer_head *blk_bh = NULL;
1511 struct ocfs2_xattr_block *xb;
1512 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1513 handle_t *handle;
1514 int ret = 0;
1515 u64 blk, bg_blkno;
1516 u16 bit;
1517
0fcaa56a 1518 ret = ocfs2_read_block(inode, block, &blk_bh);
08413899
TM
1519 if (ret < 0) {
1520 mlog_errno(ret);
1521 goto out;
1522 }
1523
f6087fb7
JB
1524 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1525 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1526 ret = -EIO;
08413899
TM
1527 goto out;
1528 }
1529
1530 ret = ocfs2_xattr_block_remove(inode, blk_bh);
1531 if (ret < 0) {
1532 mlog_errno(ret);
1533 goto out;
1534 }
1535
08413899
TM
1536 blk = le64_to_cpu(xb->xb_blkno);
1537 bit = le16_to_cpu(xb->xb_suballoc_bit);
1538 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1539
1540 xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1541 EXTENT_ALLOC_SYSTEM_INODE,
1542 le16_to_cpu(xb->xb_suballoc_slot));
1543 if (!xb_alloc_inode) {
1544 ret = -ENOMEM;
1545 mlog_errno(ret);
1546 goto out;
1547 }
1548 mutex_lock(&xb_alloc_inode->i_mutex);
1549
1550 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1551 if (ret < 0) {
1552 mlog_errno(ret);
1553 goto out_mutex;
1554 }
1555
1556 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1557 if (IS_ERR(handle)) {
1558 ret = PTR_ERR(handle);
1559 mlog_errno(ret);
1560 goto out_unlock;
1561 }
1562
1563 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1564 bit, bg_blkno, 1);
1565 if (ret < 0)
1566 mlog_errno(ret);
1567
1568 ocfs2_commit_trans(osb, handle);
1569out_unlock:
1570 ocfs2_inode_unlock(xb_alloc_inode, 1);
1571 brelse(xb_alloc_bh);
1572out_mutex:
1573 mutex_unlock(&xb_alloc_inode->i_mutex);
1574 iput(xb_alloc_inode);
1575out:
1576 brelse(blk_bh);
1577 return ret;
1578}
1579
cf1d6c76
TY
1580/*
1581 * ocfs2_xattr_remove()
1582 *
1583 * Free extended attribute resources associated with this inode.
1584 */
1585int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1586{
cf1d6c76
TY
1587 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1588 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1589 handle_t *handle;
1590 int ret;
1591
8154da3d
TY
1592 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1593 return 0;
1594
cf1d6c76
TY
1595 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1596 return 0;
1597
1598 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1599 ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1600 if (ret < 0) {
1601 mlog_errno(ret);
1602 goto out;
1603 }
1604 }
cf1d6c76 1605
08413899
TM
1606 if (di->i_xattr_loc) {
1607 ret = ocfs2_xattr_free_block(inode,
1608 le64_to_cpu(di->i_xattr_loc));
cf1d6c76
TY
1609 if (ret < 0) {
1610 mlog_errno(ret);
1611 goto out;
1612 }
1613 }
1614
1615 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1616 OCFS2_INODE_UPDATE_CREDITS);
1617 if (IS_ERR(handle)) {
1618 ret = PTR_ERR(handle);
1619 mlog_errno(ret);
1620 goto out;
1621 }
1622 ret = ocfs2_journal_access(handle, inode, di_bh,
1623 OCFS2_JOURNAL_ACCESS_WRITE);
1624 if (ret) {
1625 mlog_errno(ret);
1626 goto out_commit;
1627 }
1628
08413899 1629 di->i_xattr_loc = 0;
cf1d6c76
TY
1630
1631 spin_lock(&oi->ip_lock);
1632 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1633 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1634 spin_unlock(&oi->ip_lock);
1635
1636 ret = ocfs2_journal_dirty(handle, di_bh);
1637 if (ret < 0)
1638 mlog_errno(ret);
1639out_commit:
1640 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1641out:
cf1d6c76
TY
1642 return ret;
1643}
1644
1645static int ocfs2_xattr_has_space_inline(struct inode *inode,
1646 struct ocfs2_dinode *di)
1647{
1648 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1649 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1650 int free;
1651
1652 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1653 return 0;
1654
1655 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1656 struct ocfs2_inline_data *idata = &di->id2.i_data;
1657 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1658 } else if (ocfs2_inode_is_fast_symlink(inode)) {
1659 free = ocfs2_fast_symlink_chars(inode->i_sb) -
1660 le64_to_cpu(di->i_size);
1661 } else {
1662 struct ocfs2_extent_list *el = &di->id2.i_list;
1663 free = (le16_to_cpu(el->l_count) -
1664 le16_to_cpu(el->l_next_free_rec)) *
1665 sizeof(struct ocfs2_extent_rec);
1666 }
1667 if (free >= xattrsize)
1668 return 1;
1669
1670 return 0;
1671}
1672
1673/*
1674 * ocfs2_xattr_ibody_find()
1675 *
1676 * Find extended attribute in inode block and
1677 * fill search info into struct ocfs2_xattr_search.
1678 */
1679static int ocfs2_xattr_ibody_find(struct inode *inode,
1680 int name_index,
1681 const char *name,
1682 struct ocfs2_xattr_search *xs)
1683{
1684 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1685 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1686 int ret;
1687 int has_space = 0;
1688
1689 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1690 return 0;
1691
1692 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1693 down_read(&oi->ip_alloc_sem);
1694 has_space = ocfs2_xattr_has_space_inline(inode, di);
1695 up_read(&oi->ip_alloc_sem);
1696 if (!has_space)
1697 return 0;
1698 }
1699
1700 xs->xattr_bh = xs->inode_bh;
1701 xs->end = (void *)di + inode->i_sb->s_blocksize;
1702 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1703 xs->header = (struct ocfs2_xattr_header *)
1704 (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1705 else
1706 xs->header = (struct ocfs2_xattr_header *)
1707 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1708 xs->base = (void *)xs->header;
1709 xs->here = xs->header->xh_entries;
1710
1711 /* Find the named attribute. */
1712 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1713 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1714 if (ret && ret != -ENODATA)
1715 return ret;
1716 xs->not_found = ret;
1717 }
1718
1719 return 0;
1720}
1721
1722/*
1723 * ocfs2_xattr_ibody_set()
1724 *
1725 * Set, replace or remove an extended attribute into inode block.
1726 *
1727 */
1728static int ocfs2_xattr_ibody_set(struct inode *inode,
1729 struct ocfs2_xattr_info *xi,
1730 struct ocfs2_xattr_search *xs)
1731{
1732 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1733 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1734 int ret;
1735
1736 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1737 return -ENOSPC;
1738
1739 down_write(&oi->ip_alloc_sem);
1740 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1741 if (!ocfs2_xattr_has_space_inline(inode, di)) {
1742 ret = -ENOSPC;
1743 goto out;
1744 }
1745 }
1746
1747 ret = ocfs2_xattr_set_entry(inode, xi, xs,
1748 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1749out:
1750 up_write(&oi->ip_alloc_sem);
1751
1752 return ret;
1753}
1754
1755/*
1756 * ocfs2_xattr_block_find()
1757 *
1758 * Find extended attribute in external block and
1759 * fill search info into struct ocfs2_xattr_search.
1760 */
1761static int ocfs2_xattr_block_find(struct inode *inode,
1762 int name_index,
1763 const char *name,
1764 struct ocfs2_xattr_search *xs)
1765{
1766 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1767 struct buffer_head *blk_bh = NULL;
589dc260 1768 struct ocfs2_xattr_block *xb;
cf1d6c76
TY
1769 int ret = 0;
1770
1771 if (!di->i_xattr_loc)
1772 return ret;
1773
0fcaa56a 1774 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
cf1d6c76
TY
1775 if (ret < 0) {
1776 mlog_errno(ret);
1777 return ret;
1778 }
f6087fb7
JB
1779
1780 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1781 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1782 ret = -EIO;
1783 goto cleanup;
cf1d6c76
TY
1784 }
1785
1786 xs->xattr_bh = blk_bh;
589dc260
TM
1787
1788 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1789 xs->header = &xb->xb_attrs.xb_header;
1790 xs->base = (void *)xs->header;
1791 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
1792 xs->here = xs->header->xh_entries;
1793
1794 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1795 } else
1796 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
1797 name_index,
1798 name, xs);
cf1d6c76 1799
cf1d6c76
TY
1800 if (ret && ret != -ENODATA) {
1801 xs->xattr_bh = NULL;
1802 goto cleanup;
1803 }
1804 xs->not_found = ret;
1805 return 0;
cf1d6c76
TY
1806cleanup:
1807 brelse(blk_bh);
1808
1809 return ret;
1810}
1811
1812/*
1813 * ocfs2_xattr_block_set()
1814 *
1815 * Set, replace or remove an extended attribute into external block.
1816 *
1817 */
1818static int ocfs2_xattr_block_set(struct inode *inode,
1819 struct ocfs2_xattr_info *xi,
1820 struct ocfs2_xattr_search *xs)
1821{
1822 struct buffer_head *new_bh = NULL;
1823 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1824 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1825 struct ocfs2_alloc_context *meta_ac = NULL;
1826 handle_t *handle = NULL;
1827 struct ocfs2_xattr_block *xblk = NULL;
1828 u16 suballoc_bit_start;
1829 u32 num_got;
1830 u64 first_blkno;
1831 int ret;
1832
1833 if (!xs->xattr_bh) {
1834 /*
1835 * Alloc one external block for extended attribute
1836 * outside of inode.
1837 */
1838 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
1839 if (ret < 0) {
1840 mlog_errno(ret);
1841 goto out;
1842 }
1843 handle = ocfs2_start_trans(osb,
1844 OCFS2_XATTR_BLOCK_CREATE_CREDITS);
1845 if (IS_ERR(handle)) {
1846 ret = PTR_ERR(handle);
1847 mlog_errno(ret);
1848 goto out;
1849 }
1850 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1851 OCFS2_JOURNAL_ACCESS_CREATE);
1852 if (ret < 0) {
1853 mlog_errno(ret);
1854 goto out_commit;
1855 }
1856
1857 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
1858 &suballoc_bit_start, &num_got,
1859 &first_blkno);
1860 if (ret < 0) {
1861 mlog_errno(ret);
1862 goto out_commit;
1863 }
1864
1865 new_bh = sb_getblk(inode->i_sb, first_blkno);
1866 ocfs2_set_new_buffer_uptodate(inode, new_bh);
1867
1868 ret = ocfs2_journal_access(handle, inode, new_bh,
1869 OCFS2_JOURNAL_ACCESS_CREATE);
1870 if (ret < 0) {
1871 mlog_errno(ret);
1872 goto out_commit;
1873 }
1874
1875 /* Initialize ocfs2_xattr_block */
1876 xs->xattr_bh = new_bh;
1877 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
1878 memset(xblk, 0, inode->i_sb->s_blocksize);
1879 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
1880 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
1881 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1882 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
1883 xblk->xb_blkno = cpu_to_le64(first_blkno);
1884
1885 xs->header = &xblk->xb_attrs.xb_header;
1886 xs->base = (void *)xs->header;
1887 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
1888 xs->here = xs->header->xh_entries;
1889
1890
1891 ret = ocfs2_journal_dirty(handle, new_bh);
1892 if (ret < 0) {
1893 mlog_errno(ret);
1894 goto out_commit;
1895 }
1896 di->i_xattr_loc = cpu_to_le64(first_blkno);
1897 ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1898 if (ret < 0)
1899 mlog_errno(ret);
1900out_commit:
1901 ocfs2_commit_trans(osb, handle);
1902out:
1903 if (meta_ac)
1904 ocfs2_free_alloc_context(meta_ac);
1905 if (ret < 0)
1906 return ret;
01225596
TM
1907 } else
1908 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1909
1910 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
1911 /* Set extended attribute into external block */
1912 ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL);
1913 if (!ret || ret != -ENOSPC)
1914 goto end;
1915
1916 ret = ocfs2_xattr_create_index_block(inode, xs);
1917 if (ret)
1918 goto end;
cf1d6c76
TY
1919 }
1920
01225596 1921 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
01225596
TM
1922
1923end:
cf1d6c76
TY
1924
1925 return ret;
1926}
1927
1928/*
1929 * ocfs2_xattr_set()
1930 *
1931 * Set, replace or remove an extended attribute for this inode.
1932 * value is NULL to remove an existing extended attribute, else either
1933 * create or replace an extended attribute.
1934 */
1935int ocfs2_xattr_set(struct inode *inode,
1936 int name_index,
1937 const char *name,
1938 const void *value,
1939 size_t value_len,
1940 int flags)
1941{
1942 struct buffer_head *di_bh = NULL;
1943 struct ocfs2_dinode *di;
1944 int ret;
1945
1946 struct ocfs2_xattr_info xi = {
1947 .name_index = name_index,
1948 .name = name,
1949 .value = value,
1950 .value_len = value_len,
1951 };
1952
1953 struct ocfs2_xattr_search xis = {
1954 .not_found = -ENODATA,
1955 };
1956
1957 struct ocfs2_xattr_search xbs = {
1958 .not_found = -ENODATA,
1959 };
1960
8154da3d
TY
1961 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1962 return -EOPNOTSUPP;
1963
cf1d6c76
TY
1964 ret = ocfs2_inode_lock(inode, &di_bh, 1);
1965 if (ret < 0) {
1966 mlog_errno(ret);
1967 return ret;
1968 }
1969 xis.inode_bh = xbs.inode_bh = di_bh;
1970 di = (struct ocfs2_dinode *)di_bh->b_data;
1971
1972 down_write(&OCFS2_I(inode)->ip_xattr_sem);
1973 /*
1974 * Scan inode and external block to find the same name
1975 * extended attribute and collect search infomation.
1976 */
1977 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
1978 if (ret)
1979 goto cleanup;
1980 if (xis.not_found) {
1981 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
1982 if (ret)
1983 goto cleanup;
1984 }
1985
1986 if (xis.not_found && xbs.not_found) {
1987 ret = -ENODATA;
1988 if (flags & XATTR_REPLACE)
1989 goto cleanup;
1990 ret = 0;
1991 if (!value)
1992 goto cleanup;
1993 } else {
1994 ret = -EEXIST;
1995 if (flags & XATTR_CREATE)
1996 goto cleanup;
1997 }
1998
1999 if (!value) {
2000 /* Remove existing extended attribute */
2001 if (!xis.not_found)
2002 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2003 else if (!xbs.not_found)
2004 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2005 } else {
2006 /* We always try to set extended attribute into inode first*/
2007 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2008 if (!ret && !xbs.not_found) {
2009 /*
2010 * If succeed and that extended attribute existing in
2011 * external block, then we will remove it.
2012 */
2013 xi.value = NULL;
2014 xi.value_len = 0;
2015 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2016 } else if (ret == -ENOSPC) {
2017 if (di->i_xattr_loc && !xbs.xattr_bh) {
2018 ret = ocfs2_xattr_block_find(inode, name_index,
2019 name, &xbs);
2020 if (ret)
2021 goto cleanup;
2022 }
2023 /*
2024 * If no space in inode, we will set extended attribute
2025 * into external block.
2026 */
2027 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2028 if (ret)
2029 goto cleanup;
2030 if (!xis.not_found) {
2031 /*
2032 * If succeed and that extended attribute
2033 * existing in inode, we will remove it.
2034 */
2035 xi.value = NULL;
2036 xi.value_len = 0;
2037 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2038 }
2039 }
2040 }
2041cleanup:
2042 up_write(&OCFS2_I(inode)->ip_xattr_sem);
2043 ocfs2_inode_unlock(inode, 1);
2044 brelse(di_bh);
2045 brelse(xbs.xattr_bh);
6dde41d9 2046 ocfs2_xattr_bucket_relse(inode, &xbs.bucket);
cf1d6c76
TY
2047
2048 return ret;
2049}
2050
0c044f0b
TM
2051/*
2052 * Find the xattr extent rec which may contains name_hash.
2053 * e_cpos will be the first name hash of the xattr rec.
2054 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2055 */
2056static int ocfs2_xattr_get_rec(struct inode *inode,
2057 u32 name_hash,
2058 u64 *p_blkno,
2059 u32 *e_cpos,
2060 u32 *num_clusters,
2061 struct ocfs2_extent_list *el)
2062{
2063 int ret = 0, i;
2064 struct buffer_head *eb_bh = NULL;
2065 struct ocfs2_extent_block *eb;
2066 struct ocfs2_extent_rec *rec = NULL;
2067 u64 e_blkno = 0;
2068
2069 if (el->l_tree_depth) {
2070 ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
2071 if (ret) {
2072 mlog_errno(ret);
2073 goto out;
2074 }
2075
2076 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2077 el = &eb->h_list;
2078
2079 if (el->l_tree_depth) {
2080 ocfs2_error(inode->i_sb,
2081 "Inode %lu has non zero tree depth in "
2082 "xattr tree block %llu\n", inode->i_ino,
2083 (unsigned long long)eb_bh->b_blocknr);
2084 ret = -EROFS;
2085 goto out;
2086 }
2087 }
2088
2089 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2090 rec = &el->l_recs[i];
2091
2092 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2093 e_blkno = le64_to_cpu(rec->e_blkno);
2094 break;
2095 }
2096 }
2097
2098 if (!e_blkno) {
2099 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2100 "record (%u, %u, 0) in xattr", inode->i_ino,
2101 le32_to_cpu(rec->e_cpos),
2102 ocfs2_rec_clusters(el, rec));
2103 ret = -EROFS;
2104 goto out;
2105 }
2106
2107 *p_blkno = le64_to_cpu(rec->e_blkno);
2108 *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2109 if (e_cpos)
2110 *e_cpos = le32_to_cpu(rec->e_cpos);
2111out:
2112 brelse(eb_bh);
2113 return ret;
2114}
2115
2116typedef int (xattr_bucket_func)(struct inode *inode,
2117 struct ocfs2_xattr_bucket *bucket,
2118 void *para);
2119
589dc260
TM
2120static int ocfs2_find_xe_in_bucket(struct inode *inode,
2121 struct buffer_head *header_bh,
2122 int name_index,
2123 const char *name,
2124 u32 name_hash,
2125 u16 *xe_index,
2126 int *found)
2127{
2128 int i, ret = 0, cmp = 1, block_off, new_offset;
2129 struct ocfs2_xattr_header *xh =
2130 (struct ocfs2_xattr_header *)header_bh->b_data;
2131 size_t name_len = strlen(name);
2132 struct ocfs2_xattr_entry *xe = NULL;
2133 struct buffer_head *name_bh = NULL;
2134 char *xe_name;
2135
2136 /*
2137 * We don't use binary search in the bucket because there
2138 * may be multiple entries with the same name hash.
2139 */
2140 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2141 xe = &xh->xh_entries[i];
2142
2143 if (name_hash > le32_to_cpu(xe->xe_name_hash))
2144 continue;
2145 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2146 break;
2147
2148 cmp = name_index - ocfs2_xattr_get_type(xe);
2149 if (!cmp)
2150 cmp = name_len - xe->xe_name_len;
2151 if (cmp)
2152 continue;
2153
2154 ret = ocfs2_xattr_bucket_get_name_value(inode,
2155 xh,
2156 i,
2157 &block_off,
2158 &new_offset);
2159 if (ret) {
2160 mlog_errno(ret);
2161 break;
2162 }
2163
0fcaa56a
JB
2164 ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off,
2165 &name_bh);
589dc260
TM
2166 if (ret) {
2167 mlog_errno(ret);
2168 break;
2169 }
2170 xe_name = name_bh->b_data + new_offset;
2171
2172 cmp = memcmp(name, xe_name, name_len);
2173 brelse(name_bh);
2174 name_bh = NULL;
2175
2176 if (cmp == 0) {
2177 *xe_index = i;
2178 *found = 1;
2179 ret = 0;
2180 break;
2181 }
2182 }
2183
2184 return ret;
2185}
2186
2187/*
2188 * Find the specified xattr entry in a series of buckets.
2189 * This series start from p_blkno and last for num_clusters.
2190 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2191 * the num of the valid buckets.
2192 *
2193 * Return the buffer_head this xattr should reside in. And if the xattr's
2194 * hash is in the gap of 2 buckets, return the lower bucket.
2195 */
2196static int ocfs2_xattr_bucket_find(struct inode *inode,
2197 int name_index,
2198 const char *name,
2199 u32 name_hash,
2200 u64 p_blkno,
2201 u32 first_hash,
2202 u32 num_clusters,
2203 struct ocfs2_xattr_search *xs)
2204{
2205 int ret, found = 0;
2206 struct buffer_head *bh = NULL;
2207 struct buffer_head *lower_bh = NULL;
2208 struct ocfs2_xattr_header *xh = NULL;
2209 struct ocfs2_xattr_entry *xe = NULL;
2210 u16 index = 0;
2211 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2212 int low_bucket = 0, bucket, high_bucket;
2213 u32 last_hash;
2214 u64 blkno;
2215
0fcaa56a 2216 ret = ocfs2_read_block(inode, p_blkno, &bh);
589dc260
TM
2217 if (ret) {
2218 mlog_errno(ret);
2219 goto out;
2220 }
2221
2222 xh = (struct ocfs2_xattr_header *)bh->b_data;
2223 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2224
2225 while (low_bucket <= high_bucket) {
2226 brelse(bh);
2227 bh = NULL;
2228 bucket = (low_bucket + high_bucket) / 2;
2229
2230 blkno = p_blkno + bucket * blk_per_bucket;
2231
0fcaa56a 2232 ret = ocfs2_read_block(inode, blkno, &bh);
589dc260
TM
2233 if (ret) {
2234 mlog_errno(ret);
2235 goto out;
2236 }
2237
2238 xh = (struct ocfs2_xattr_header *)bh->b_data;
2239 xe = &xh->xh_entries[0];
2240 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2241 high_bucket = bucket - 1;
2242 continue;
2243 }
2244
2245 /*
2246 * Check whether the hash of the last entry in our
5a095611
TM
2247 * bucket is larger than the search one. for an empty
2248 * bucket, the last one is also the first one.
589dc260 2249 */
5a095611
TM
2250 if (xh->xh_count)
2251 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
2252
589dc260
TM
2253 last_hash = le32_to_cpu(xe->xe_name_hash);
2254
2255 /* record lower_bh which may be the insert place. */
2256 brelse(lower_bh);
2257 lower_bh = bh;
2258 bh = NULL;
2259
2260 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2261 low_bucket = bucket + 1;
2262 continue;
2263 }
2264
2265 /* the searched xattr should reside in this bucket if exists. */
2266 ret = ocfs2_find_xe_in_bucket(inode, lower_bh,
2267 name_index, name, name_hash,
2268 &index, &found);
2269 if (ret) {
2270 mlog_errno(ret);
2271 goto out;
2272 }
2273 break;
2274 }
2275
2276 /*
2277 * Record the bucket we have found.
2278 * When the xattr's hash value is in the gap of 2 buckets, we will
2279 * always set it to the previous bucket.
2280 */
2281 if (!lower_bh) {
2282 /*
2283 * We can't find any bucket whose first name_hash is less
2284 * than the find name_hash.
2285 */
2286 BUG_ON(bh->b_blocknr != p_blkno);
2287 lower_bh = bh;
2288 bh = NULL;
2289 }
4ac6032d 2290 xs->bucket.bu_bhs[0] = lower_bh;
589dc260
TM
2291 lower_bh = NULL;
2292
3e632946 2293 xs->header = bucket_xh(&xs->bucket);
51def39f 2294 xs->base = bucket_block(&xs->bucket, 0);
589dc260
TM
2295 xs->end = xs->base + inode->i_sb->s_blocksize;
2296
2297 if (found) {
2298 /*
2299 * If we have found the xattr enty, read all the blocks in
2300 * this bucket.
2301 */
9c7759aa 2302 ret = ocfs2_read_blocks(inode, bucket_blkno(&xs->bucket) + 1,
4ac6032d 2303 blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
1efd47f8 2304 0);
589dc260
TM
2305 if (ret) {
2306 mlog_errno(ret);
2307 goto out;
2308 }
2309
2310 xs->here = &xs->header->xh_entries[index];
2311 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
9c7759aa 2312 (unsigned long long)bucket_blkno(&xs->bucket), index);
589dc260
TM
2313 } else
2314 ret = -ENODATA;
2315
2316out:
2317 brelse(bh);
2318 brelse(lower_bh);
2319 return ret;
2320}
2321
2322static int ocfs2_xattr_index_block_find(struct inode *inode,
2323 struct buffer_head *root_bh,
2324 int name_index,
2325 const char *name,
2326 struct ocfs2_xattr_search *xs)
2327{
2328 int ret;
2329 struct ocfs2_xattr_block *xb =
2330 (struct ocfs2_xattr_block *)root_bh->b_data;
2331 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
2332 struct ocfs2_extent_list *el = &xb_root->xt_list;
2333 u64 p_blkno = 0;
2334 u32 first_hash, num_clusters = 0;
2057e5c6 2335 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
589dc260
TM
2336
2337 if (le16_to_cpu(el->l_next_free_rec) == 0)
2338 return -ENODATA;
2339
2340 mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
2341 name, name_hash, name_index);
2342
2343 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
2344 &num_clusters, el);
2345 if (ret) {
2346 mlog_errno(ret);
2347 goto out;
2348 }
2349
2350 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
2351
2352 mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
de29c085
MF
2353 "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
2354 first_hash);
589dc260
TM
2355
2356 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
2357 p_blkno, first_hash, num_clusters, xs);
2358
2359out:
2360 return ret;
2361}
2362
0c044f0b
TM
2363static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2364 u64 blkno,
2365 u32 clusters,
2366 xattr_bucket_func *func,
2367 void *para)
2368{
6dde41d9 2369 int i, ret = 0;
0c044f0b
TM
2370 int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2371 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2372 u32 num_buckets = clusters * bpc;
2373 struct ocfs2_xattr_bucket bucket;
2374
2375 memset(&bucket, 0, sizeof(bucket));
2376
2377 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
de29c085 2378 clusters, (unsigned long long)blkno);
0c044f0b
TM
2379
2380 for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
31d33073 2381 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket,
4ac6032d 2382 bucket.bu_bhs, 0);
0c044f0b
TM
2383 if (ret) {
2384 mlog_errno(ret);
2385 goto out;
2386 }
2387
0c044f0b
TM
2388 /*
2389 * The real bucket num in this series of blocks is stored
2390 * in the 1st bucket.
2391 */
2392 if (i == 0)
3e632946 2393 num_buckets = le16_to_cpu(bucket_xh(&bucket)->xh_num_buckets);
0c044f0b 2394
de29c085
MF
2395 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
2396 (unsigned long long)blkno,
3e632946 2397 le32_to_cpu(bucket_xh(&bucket)->xh_entries[0].xe_name_hash));
0c044f0b
TM
2398 if (func) {
2399 ret = func(inode, &bucket, para);
2400 if (ret) {
2401 mlog_errno(ret);
2402 break;
2403 }
2404 }
2405
6dde41d9 2406 ocfs2_xattr_bucket_relse(inode, &bucket);
0c044f0b
TM
2407 memset(&bucket, 0, sizeof(bucket));
2408 }
2409
2410out:
6dde41d9 2411 ocfs2_xattr_bucket_relse(inode, &bucket);
0c044f0b
TM
2412
2413 return ret;
2414}
2415
2416struct ocfs2_xattr_tree_list {
2417 char *buffer;
2418 size_t buffer_size;
936b8834 2419 size_t result;
0c044f0b
TM
2420};
2421
2422static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
2423 struct ocfs2_xattr_header *xh,
2424 int index,
2425 int *block_off,
2426 int *new_offset)
2427{
2428 u16 name_offset;
2429
2430 if (index < 0 || index >= le16_to_cpu(xh->xh_count))
2431 return -EINVAL;
2432
2433 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
2434
2435 *block_off = name_offset >> inode->i_sb->s_blocksize_bits;
2436 *new_offset = name_offset % inode->i_sb->s_blocksize;
2437
2438 return 0;
2439}
2440
2441static int ocfs2_list_xattr_bucket(struct inode *inode,
2442 struct ocfs2_xattr_bucket *bucket,
2443 void *para)
2444{
936b8834 2445 int ret = 0, type;
0c044f0b 2446 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
0c044f0b 2447 int i, block_off, new_offset;
936b8834 2448 const char *prefix, *name;
0c044f0b 2449
3e632946
JB
2450 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
2451 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
936b8834
TM
2452 type = ocfs2_xattr_get_type(entry);
2453 prefix = ocfs2_xattr_prefix(type);
0c044f0b 2454
936b8834 2455 if (prefix) {
0c044f0b 2456 ret = ocfs2_xattr_bucket_get_name_value(inode,
3e632946 2457 bucket_xh(bucket),
0c044f0b
TM
2458 i,
2459 &block_off,
2460 &new_offset);
2461 if (ret)
2462 break;
936b8834 2463
51def39f 2464 name = (const char *)bucket_block(bucket, block_off) +
936b8834
TM
2465 new_offset;
2466 ret = ocfs2_xattr_list_entry(xl->buffer,
2467 xl->buffer_size,
2468 &xl->result,
2469 prefix, name,
2470 entry->xe_name_len);
2471 if (ret)
2472 break;
0c044f0b
TM
2473 }
2474 }
2475
2476 return ret;
2477}
2478
2479static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
2480 struct ocfs2_xattr_tree_root *xt,
2481 char *buffer,
2482 size_t buffer_size)
2483{
2484 struct ocfs2_extent_list *el = &xt->xt_list;
2485 int ret = 0;
2486 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
2487 u64 p_blkno = 0;
2488 struct ocfs2_xattr_tree_list xl = {
2489 .buffer = buffer,
2490 .buffer_size = buffer_size,
936b8834 2491 .result = 0,
0c044f0b
TM
2492 };
2493
2494 if (le16_to_cpu(el->l_next_free_rec) == 0)
2495 return 0;
2496
2497 while (name_hash > 0) {
2498 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
2499 &e_cpos, &num_clusters, el);
2500 if (ret) {
2501 mlog_errno(ret);
2502 goto out;
2503 }
2504
2505 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
2506 ocfs2_list_xattr_bucket,
2507 &xl);
2508 if (ret) {
2509 mlog_errno(ret);
2510 goto out;
2511 }
2512
2513 if (e_cpos == 0)
2514 break;
2515
2516 name_hash = e_cpos - 1;
2517 }
2518
936b8834 2519 ret = xl.result;
0c044f0b
TM
2520out:
2521 return ret;
2522}
01225596
TM
2523
2524static int cmp_xe(const void *a, const void *b)
2525{
2526 const struct ocfs2_xattr_entry *l = a, *r = b;
2527 u32 l_hash = le32_to_cpu(l->xe_name_hash);
2528 u32 r_hash = le32_to_cpu(r->xe_name_hash);
2529
2530 if (l_hash > r_hash)
2531 return 1;
2532 if (l_hash < r_hash)
2533 return -1;
2534 return 0;
2535}
2536
2537static void swap_xe(void *a, void *b, int size)
2538{
2539 struct ocfs2_xattr_entry *l = a, *r = b, tmp;
2540
2541 tmp = *l;
2542 memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
2543 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
2544}
2545
2546/*
2547 * When the ocfs2_xattr_block is filled up, new bucket will be created
2548 * and all the xattr entries will be moved to the new bucket.
2549 * Note: we need to sort the entries since they are not saved in order
2550 * in the ocfs2_xattr_block.
2551 */
2552static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2553 struct buffer_head *xb_bh,
2554 struct buffer_head *xh_bh,
2555 struct buffer_head *data_bh)
2556{
2557 int i, blocksize = inode->i_sb->s_blocksize;
2558 u16 offset, size, off_change;
2559 struct ocfs2_xattr_entry *xe;
2560 struct ocfs2_xattr_block *xb =
2561 (struct ocfs2_xattr_block *)xb_bh->b_data;
2562 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
2563 struct ocfs2_xattr_header *xh =
2564 (struct ocfs2_xattr_header *)xh_bh->b_data;
2565 u16 count = le16_to_cpu(xb_xh->xh_count);
2566 char *target = xh_bh->b_data, *src = xb_bh->b_data;
2567
2568 mlog(0, "cp xattr from block %llu to bucket %llu\n",
2569 (unsigned long long)xb_bh->b_blocknr,
2570 (unsigned long long)xh_bh->b_blocknr);
2571
2572 memset(xh_bh->b_data, 0, blocksize);
2573 if (data_bh)
2574 memset(data_bh->b_data, 0, blocksize);
2575 /*
2576 * Since the xe_name_offset is based on ocfs2_xattr_header,
2577 * there is a offset change corresponding to the change of
2578 * ocfs2_xattr_header's position.
2579 */
2580 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2581 xe = &xb_xh->xh_entries[count - 1];
2582 offset = le16_to_cpu(xe->xe_name_offset) + off_change;
2583 size = blocksize - offset;
2584
2585 /* copy all the names and values. */
2586 if (data_bh)
2587 target = data_bh->b_data;
2588 memcpy(target + offset, src + offset, size);
2589
2590 /* Init new header now. */
2591 xh->xh_count = xb_xh->xh_count;
2592 xh->xh_num_buckets = cpu_to_le16(1);
2593 xh->xh_name_value_len = cpu_to_le16(size);
2594 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
2595
2596 /* copy all the entries. */
2597 target = xh_bh->b_data;
2598 offset = offsetof(struct ocfs2_xattr_header, xh_entries);
2599 size = count * sizeof(struct ocfs2_xattr_entry);
2600 memcpy(target + offset, (char *)xb_xh + offset, size);
2601
2602 /* Change the xe offset for all the xe because of the move. */
2603 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
2604 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2605 for (i = 0; i < count; i++)
2606 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
2607
2608 mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
2609 offset, size, off_change);
2610
2611 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
2612 cmp_xe, swap_xe);
2613}
2614
2615/*
2616 * After we move xattr from block to index btree, we have to
2617 * update ocfs2_xattr_search to the new xe and base.
2618 *
2619 * When the entry is in xattr block, xattr_bh indicates the storage place.
2620 * While if the entry is in index b-tree, "bucket" indicates the
2621 * real place of the xattr.
2622 */
2623static int ocfs2_xattr_update_xattr_search(struct inode *inode,
2624 struct ocfs2_xattr_search *xs,
2625 struct buffer_head *old_bh,
2626 struct buffer_head *new_bh)
2627{
2628 int ret = 0;
2629 char *buf = old_bh->b_data;
2630 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
2631 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
2632 int i, blocksize = inode->i_sb->s_blocksize;
2633 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2634
4ac6032d 2635 xs->bucket.bu_bhs[0] = new_bh;
01225596 2636 get_bh(new_bh);
3e632946 2637 xs->header = bucket_xh(&xs->bucket);
01225596
TM
2638
2639 xs->base = new_bh->b_data;
2640 xs->end = xs->base + inode->i_sb->s_blocksize;
2641
2642 if (!xs->not_found) {
2643 if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
31d33073 2644 ret = ocfs2_read_blocks(inode,
9c7759aa 2645 bucket_blkno(&xs->bucket) + 1,
4ac6032d 2646 blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
1efd47f8 2647 0);
01225596
TM
2648 if (ret) {
2649 mlog_errno(ret);
2650 return ret;
2651 }
2652
01225596 2653 }
83099bc6
TM
2654 i = xs->here - old_xh->xh_entries;
2655 xs->here = &xs->header->xh_entries[i];
01225596
TM
2656 }
2657
2658 return ret;
2659}
2660
2661static int ocfs2_xattr_create_index_block(struct inode *inode,
2662 struct ocfs2_xattr_search *xs)
2663{
2664 int ret, credits = OCFS2_SUBALLOC_ALLOC;
2665 u32 bit_off, len;
2666 u64 blkno;
2667 handle_t *handle;
2668 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2669 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2670 struct ocfs2_alloc_context *data_ac;
2671 struct buffer_head *xh_bh = NULL, *data_bh = NULL;
2672 struct buffer_head *xb_bh = xs->xattr_bh;
2673 struct ocfs2_xattr_block *xb =
2674 (struct ocfs2_xattr_block *)xb_bh->b_data;
2675 struct ocfs2_xattr_tree_root *xr;
2676 u16 xb_flags = le16_to_cpu(xb->xb_flags);
2677 u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2678
2679 mlog(0, "create xattr index block for %llu\n",
2680 (unsigned long long)xb_bh->b_blocknr);
2681
2682 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
2683
2684 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
2685 if (ret) {
2686 mlog_errno(ret);
2687 goto out;
2688 }
2689
2690 /*
2691 * XXX:
2692 * We can use this lock for now, and maybe move to a dedicated mutex
2693 * if performance becomes a problem later.
2694 */
2695 down_write(&oi->ip_alloc_sem);
2696
2697 /*
2698 * 3 more credits, one for xattr block update, one for the 1st block
2699 * of the new xattr bucket and one for the value/data.
2700 */
2701 credits += 3;
2702 handle = ocfs2_start_trans(osb, credits);
2703 if (IS_ERR(handle)) {
2704 ret = PTR_ERR(handle);
2705 mlog_errno(ret);
2706 goto out_sem;
2707 }
2708
2709 ret = ocfs2_journal_access(handle, inode, xb_bh,
2710 OCFS2_JOURNAL_ACCESS_WRITE);
2711 if (ret) {
2712 mlog_errno(ret);
2713 goto out_commit;
2714 }
2715
2716 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
2717 if (ret) {
2718 mlog_errno(ret);
2719 goto out_commit;
2720 }
2721
2722 /*
2723 * The bucket may spread in many blocks, and
2724 * we will only touch the 1st block and the last block
2725 * in the whole bucket(one for entry and one for data).
2726 */
2727 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
2728
de29c085
MF
2729 mlog(0, "allocate 1 cluster from %llu to xattr block\n",
2730 (unsigned long long)blkno);
01225596
TM
2731
2732 xh_bh = sb_getblk(inode->i_sb, blkno);
2733 if (!xh_bh) {
2734 ret = -EIO;
2735 mlog_errno(ret);
2736 goto out_commit;
2737 }
2738
2739 ocfs2_set_new_buffer_uptodate(inode, xh_bh);
2740
2741 ret = ocfs2_journal_access(handle, inode, xh_bh,
2742 OCFS2_JOURNAL_ACCESS_CREATE);
2743 if (ret) {
2744 mlog_errno(ret);
2745 goto out_commit;
2746 }
2747
2748 if (bpb > 1) {
2749 data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
2750 if (!data_bh) {
2751 ret = -EIO;
2752 mlog_errno(ret);
2753 goto out_commit;
2754 }
2755
2756 ocfs2_set_new_buffer_uptodate(inode, data_bh);
2757
2758 ret = ocfs2_journal_access(handle, inode, data_bh,
2759 OCFS2_JOURNAL_ACCESS_CREATE);
2760 if (ret) {
2761 mlog_errno(ret);
2762 goto out_commit;
2763 }
2764 }
2765
2766 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);
2767
2768 ocfs2_journal_dirty(handle, xh_bh);
2769 if (data_bh)
2770 ocfs2_journal_dirty(handle, data_bh);
2771
bd60bd37
JB
2772 ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
2773 if (ret) {
2774 mlog_errno(ret);
2775 goto out_commit;
2776 }
01225596
TM
2777
2778 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
2779 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
2780 offsetof(struct ocfs2_xattr_block, xb_attrs));
2781
2782 xr = &xb->xb_attrs.xb_root;
2783 xr->xt_clusters = cpu_to_le32(1);
2784 xr->xt_last_eb_blk = 0;
2785 xr->xt_list.l_tree_depth = 0;
2786 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
2787 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2788
2789 xr->xt_list.l_recs[0].e_cpos = 0;
2790 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
2791 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
2792
2793 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
2794
2795 ret = ocfs2_journal_dirty(handle, xb_bh);
2796 if (ret) {
2797 mlog_errno(ret);
2798 goto out_commit;
2799 }
2800
2801out_commit:
2802 ocfs2_commit_trans(osb, handle);
2803
2804out_sem:
2805 up_write(&oi->ip_alloc_sem);
2806
2807out:
2808 if (data_ac)
2809 ocfs2_free_alloc_context(data_ac);
2810
2811 brelse(xh_bh);
2812 brelse(data_bh);
2813
2814 return ret;
2815}
2816
2817static int cmp_xe_offset(const void *a, const void *b)
2818{
2819 const struct ocfs2_xattr_entry *l = a, *r = b;
2820 u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
2821 u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
2822
2823 if (l_name_offset < r_name_offset)
2824 return 1;
2825 if (l_name_offset > r_name_offset)
2826 return -1;
2827 return 0;
2828}
2829
2830/*
2831 * defrag a xattr bucket if we find that the bucket has some
2832 * holes beteen name/value pairs.
2833 * We will move all the name/value pairs to the end of the bucket
2834 * so that we can spare some space for insertion.
2835 */
2836static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2837 struct ocfs2_xattr_bucket *bucket)
2838{
2839 int ret, i;
2840 size_t end, offset, len, value_len;
2841 struct ocfs2_xattr_header *xh;
2842 char *entries, *buf, *bucket_buf = NULL;
9c7759aa 2843 u64 blkno = bucket_blkno(bucket);
01225596
TM
2844 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2845 u16 xh_free_start;
01225596
TM
2846 size_t blocksize = inode->i_sb->s_blocksize;
2847 handle_t *handle;
2848 struct buffer_head **bhs;
2849 struct ocfs2_xattr_entry *xe;
2850
2851 bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
2852 GFP_NOFS);
2853 if (!bhs)
2854 return -ENOMEM;
2855
1efd47f8 2856 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0);
01225596
TM
2857 if (ret)
2858 goto out;
2859
2860 /*
2861 * In order to make the operation more efficient and generic,
2862 * we copy all the blocks into a contiguous memory and do the
2863 * defragment there, so if anything is error, we will not touch
2864 * the real block.
2865 */
2866 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
2867 if (!bucket_buf) {
2868 ret = -EIO;
2869 goto out;
2870 }
2871
2872 buf = bucket_buf;
2873 for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
2874 memcpy(buf, bhs[i]->b_data, blocksize);
2875
2876 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
2877 if (IS_ERR(handle)) {
2878 ret = PTR_ERR(handle);
2879 handle = NULL;
2880 mlog_errno(ret);
2881 goto out;
2882 }
2883
2884 for (i = 0; i < blk_per_bucket; i++) {
2885 ret = ocfs2_journal_access(handle, inode, bhs[i],
2886 OCFS2_JOURNAL_ACCESS_WRITE);
2887 if (ret < 0) {
2888 mlog_errno(ret);
2889 goto commit;
2890 }
2891 }
2892
2893 xh = (struct ocfs2_xattr_header *)bucket_buf;
2894 entries = (char *)xh->xh_entries;
2895 xh_free_start = le16_to_cpu(xh->xh_free_start);
2896
2897 mlog(0, "adjust xattr bucket in %llu, count = %u, "
2898 "xh_free_start = %u, xh_name_value_len = %u.\n",
de29c085
MF
2899 (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
2900 xh_free_start, le16_to_cpu(xh->xh_name_value_len));
01225596
TM
2901
2902 /*
2903 * sort all the entries by their offset.
2904 * the largest will be the first, so that we can
2905 * move them to the end one by one.
2906 */
2907 sort(entries, le16_to_cpu(xh->xh_count),
2908 sizeof(struct ocfs2_xattr_entry),
2909 cmp_xe_offset, swap_xe);
2910
2911 /* Move all name/values to the end of the bucket. */
2912 xe = xh->xh_entries;
2913 end = OCFS2_XATTR_BUCKET_SIZE;
2914 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
2915 offset = le16_to_cpu(xe->xe_name_offset);
2916 if (ocfs2_xattr_is_local(xe))
2917 value_len = OCFS2_XATTR_SIZE(
2918 le64_to_cpu(xe->xe_value_size));
2919 else
2920 value_len = OCFS2_XATTR_ROOT_SIZE;
2921 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
2922
2923 /*
2924 * We must make sure that the name/value pair
2925 * exist in the same block. So adjust end to
2926 * the previous block end if needed.
2927 */
2928 if (((end - len) / blocksize !=
2929 (end - 1) / blocksize))
2930 end = end - end % blocksize;
2931
2932 if (end > offset + len) {
2933 memmove(bucket_buf + end - len,
2934 bucket_buf + offset, len);
2935 xe->xe_name_offset = cpu_to_le16(end - len);
2936 }
2937
2938 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
2939 "bucket %llu\n", (unsigned long long)blkno);
2940
2941 end -= len;
2942 }
2943
2944 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
2945 "bucket %llu\n", (unsigned long long)blkno);
2946
2947 if (xh_free_start == end)
2948 goto commit;
2949
2950 memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
2951 xh->xh_free_start = cpu_to_le16(end);
2952
2953 /* sort the entries by their name_hash. */
2954 sort(entries, le16_to_cpu(xh->xh_count),
2955 sizeof(struct ocfs2_xattr_entry),
2956 cmp_xe, swap_xe);
2957
2958 buf = bucket_buf;
2959 for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
2960 memcpy(bhs[i]->b_data, buf, blocksize);
2961 ocfs2_journal_dirty(handle, bhs[i]);
2962 }
2963
2964commit:
2965 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2966out:
2967
2968 if (bhs) {
2969 for (i = 0; i < blk_per_bucket; i++)
2970 brelse(bhs[i]);
2971 }
2972 kfree(bhs);
2973
2974 kfree(bucket_buf);
2975 return ret;
2976}
2977
2978/*
2979 * Move half nums of the xattr bucket in the previous cluster to this new
2980 * cluster. We only touch the last cluster of the previous extend record.
2981 *
2982 * first_bh is the first buffer_head of a series of bucket in the same
2983 * extent rec and header_bh is the header of one bucket in this cluster.
2984 * They will be updated if we move the data header_bh contains to the new
2985 * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
2986 */
2987static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
2988 handle_t *handle,
2989 struct buffer_head **first_bh,
2990 struct buffer_head **header_bh,
2991 u64 new_blkno,
2992 u64 prev_blkno,
2993 u32 num_clusters,
2994 u32 *first_hash)
2995{
2996 int i, ret, credits;
2997 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2998 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
2999 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3000 int blocksize = inode->i_sb->s_blocksize;
3001 struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
3002 struct ocfs2_xattr_header *new_xh;
3003 struct ocfs2_xattr_header *xh =
3004 (struct ocfs2_xattr_header *)((*first_bh)->b_data);
3005
3006 BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3007 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3008
3009 prev_bh = *first_bh;
3010 get_bh(prev_bh);
3011 xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3012
3013 prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3014
3015 mlog(0, "move half of xattrs in cluster %llu to %llu\n",
de29c085 3016 (unsigned long long)prev_blkno, (unsigned long long)new_blkno);
01225596
TM
3017
3018 /*
3019 * We need to update the 1st half of the new cluster and
3020 * 1 more for the update of the 1st bucket of the previous
3021 * extent record.
3022 */
3023 credits = bpc / 2 + 1;
3024 ret = ocfs2_extend_trans(handle, credits);
3025 if (ret) {
3026 mlog_errno(ret);
3027 goto out;
3028 }
3029
3030 ret = ocfs2_journal_access(handle, inode, prev_bh,
3031 OCFS2_JOURNAL_ACCESS_WRITE);
3032 if (ret) {
3033 mlog_errno(ret);
3034 goto out;
3035 }
3036
3037 for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
3038 old_bh = new_bh = NULL;
3039 new_bh = sb_getblk(inode->i_sb, new_blkno);
3040 if (!new_bh) {
3041 ret = -EIO;
3042 mlog_errno(ret);
3043 goto out;
3044 }
3045
3046 ocfs2_set_new_buffer_uptodate(inode, new_bh);
3047
3048 ret = ocfs2_journal_access(handle, inode, new_bh,
3049 OCFS2_JOURNAL_ACCESS_CREATE);
3050 if (ret < 0) {
3051 mlog_errno(ret);
3052 brelse(new_bh);
3053 goto out;
3054 }
3055
0fcaa56a 3056 ret = ocfs2_read_block(inode, prev_blkno, &old_bh);
01225596
TM
3057 if (ret < 0) {
3058 mlog_errno(ret);
3059 brelse(new_bh);
3060 goto out;
3061 }
3062
3063 memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3064
3065 if (i == 0) {
3066 new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3067 new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3068
3069 if (first_hash)
3070 *first_hash = le32_to_cpu(
3071 new_xh->xh_entries[0].xe_name_hash);
3072 new_first_bh = new_bh;
3073 get_bh(new_first_bh);
3074 }
3075
3076 ocfs2_journal_dirty(handle, new_bh);
3077
3078 if (*header_bh == old_bh) {
3079 brelse(*header_bh);
3080 *header_bh = new_bh;
3081 get_bh(*header_bh);
3082
3083 brelse(*first_bh);
3084 *first_bh = new_first_bh;
3085 get_bh(*first_bh);
3086 }
3087 brelse(new_bh);
3088 brelse(old_bh);
3089 }
3090
3091 le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3092
3093 ocfs2_journal_dirty(handle, prev_bh);
3094out:
3095 brelse(prev_bh);
3096 brelse(new_first_bh);
3097 return ret;
3098}
3099
3100static int ocfs2_read_xattr_bucket(struct inode *inode,
3101 u64 blkno,
3102 struct buffer_head **bhs,
3103 int new)
3104{
3105 int ret = 0;
3106 u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3107
3108 if (!new)
31d33073 3109 return ocfs2_read_blocks(inode, blkno,
1efd47f8 3110 blk_per_bucket, bhs, 0);
01225596
TM
3111
3112 for (i = 0; i < blk_per_bucket; i++) {
3113 bhs[i] = sb_getblk(inode->i_sb, blkno + i);
3114 if (bhs[i] == NULL) {
3115 ret = -EIO;
3116 mlog_errno(ret);
3117 break;
3118 }
3119 ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
3120 }
3121
3122 return ret;
3123}
3124
3125/*
80bcaf34
TM
3126 * Find the suitable pos when we divide a bucket into 2.
3127 * We have to make sure the xattrs with the same hash value exist
3128 * in the same bucket.
3129 *
3130 * If this ocfs2_xattr_header covers more than one hash value, find a
3131 * place where the hash value changes. Try to find the most even split.
3132 * The most common case is that all entries have different hash values,
3133 * and the first check we make will find a place to split.
3134 */
3135static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3136{
3137 struct ocfs2_xattr_entry *entries = xh->xh_entries;
3138 int count = le16_to_cpu(xh->xh_count);
3139 int delta, middle = count / 2;
3140
3141 /*
3142 * We start at the middle. Each step gets farther away in both
3143 * directions. We therefore hit the change in hash value
3144 * nearest to the middle. Note that this loop does not execute for
3145 * count < 2.
3146 */
3147 for (delta = 0; delta < middle; delta++) {
3148 /* Let's check delta earlier than middle */
3149 if (cmp_xe(&entries[middle - delta - 1],
3150 &entries[middle - delta]))
3151 return middle - delta;
3152
3153 /* For even counts, don't walk off the end */
3154 if ((middle + delta + 1) == count)
3155 continue;
3156
3157 /* Now try delta past middle */
3158 if (cmp_xe(&entries[middle + delta],
3159 &entries[middle + delta + 1]))
3160 return middle + delta + 1;
3161 }
3162
3163 /* Every entry had the same hash */
3164 return count;
3165}
3166
3167/*
3168 * Move some xattrs in old bucket(blk) to new bucket(new_blk).
01225596 3169 * first_hash will record the 1st hash of the new bucket.
80bcaf34
TM
3170 *
3171 * Normally half of the xattrs will be moved. But we have to make
3172 * sure that the xattrs with the same hash value are stored in the
3173 * same bucket. If all the xattrs in this bucket have the same hash
3174 * value, the new bucket will be initialized as an empty one and the
3175 * first_hash will be initialized as (hash_value+1).
01225596 3176 */
80bcaf34
TM
3177static int ocfs2_divide_xattr_bucket(struct inode *inode,
3178 handle_t *handle,
3179 u64 blk,
3180 u64 new_blk,
3181 u32 *first_hash,
3182 int new_bucket_head)
01225596
TM
3183{
3184 int ret, i;
80bcaf34 3185 int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
01225596
TM
3186 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3187 struct buffer_head **s_bhs, **t_bhs = NULL;
3188 struct ocfs2_xattr_header *xh;
3189 struct ocfs2_xattr_entry *xe;
3190 int blocksize = inode->i_sb->s_blocksize;
3191
80bcaf34 3192 mlog(0, "move some of xattrs from bucket %llu to %llu\n",
de29c085 3193 (unsigned long long)blk, (unsigned long long)new_blk);
01225596
TM
3194
3195 s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3196 if (!s_bhs)
3197 return -ENOMEM;
3198
3199 ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
3200 if (ret) {
3201 mlog_errno(ret);
3202 goto out;
3203 }
3204
3205 ret = ocfs2_journal_access(handle, inode, s_bhs[0],
3206 OCFS2_JOURNAL_ACCESS_WRITE);
3207 if (ret) {
3208 mlog_errno(ret);
3209 goto out;
3210 }
3211
3212 t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3213 if (!t_bhs) {
3214 ret = -ENOMEM;
3215 goto out;
3216 }
3217
3218 ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head);
3219 if (ret) {
3220 mlog_errno(ret);
3221 goto out;
3222 }
3223
3224 for (i = 0; i < blk_per_bucket; i++) {
3225 ret = ocfs2_journal_access(handle, inode, t_bhs[i],
eb6ff239
JB
3226 new_bucket_head ?
3227 OCFS2_JOURNAL_ACCESS_CREATE :
3228 OCFS2_JOURNAL_ACCESS_WRITE);
01225596
TM
3229 if (ret) {
3230 mlog_errno(ret);
3231 goto out;
3232 }
3233 }
3234
80bcaf34
TM
3235 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3236 count = le16_to_cpu(xh->xh_count);
3237 start = ocfs2_xattr_find_divide_pos(xh);
3238
3239 if (start == count) {
3240 xe = &xh->xh_entries[start-1];
3241
3242 /*
3243 * initialized a new empty bucket here.
3244 * The hash value is set as one larger than
3245 * that of the last entry in the previous bucket.
3246 */
3247 for (i = 0; i < blk_per_bucket; i++)
3248 memset(t_bhs[i]->b_data, 0, blocksize);
3249
3250 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3251 xh->xh_free_start = cpu_to_le16(blocksize);
3252 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3253 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3254
3255 goto set_num_buckets;
3256 }
3257
01225596
TM
3258 /* copy the whole bucket to the new first. */
3259 for (i = 0; i < blk_per_bucket; i++)
3260 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3261
3262 /* update the new bucket. */
3263 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
01225596
TM
3264
3265 /*
3266 * Calculate the total name/value len and xh_free_start for
3267 * the old bucket first.
3268 */
3269 name_offset = OCFS2_XATTR_BUCKET_SIZE;
3270 name_value_len = 0;
3271 for (i = 0; i < start; i++) {
3272 xe = &xh->xh_entries[i];
3273 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3274 if (ocfs2_xattr_is_local(xe))
3275 xe_len +=
3276 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3277 else
3278 xe_len += OCFS2_XATTR_ROOT_SIZE;
3279 name_value_len += xe_len;
3280 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3281 name_offset = le16_to_cpu(xe->xe_name_offset);
3282 }
3283
3284 /*
3285 * Now begin the modification to the new bucket.
3286 *
3287 * In the new bucket, We just move the xattr entry to the beginning
3288 * and don't touch the name/value. So there will be some holes in the
3289 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3290 * called.
3291 */
3292 xe = &xh->xh_entries[start];
3293 len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3294 mlog(0, "mv xattr entry len %d from %d to %d\n", len,
ff1ec20e
MF
3295 (int)((char *)xe - (char *)xh),
3296 (int)((char *)xh->xh_entries - (char *)xh));
01225596
TM
3297 memmove((char *)xh->xh_entries, (char *)xe, len);
3298 xe = &xh->xh_entries[count - start];
3299 len = sizeof(struct ocfs2_xattr_entry) * start;
3300 memset((char *)xe, 0, len);
3301
3302 le16_add_cpu(&xh->xh_count, -start);
3303 le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3304
3305 /* Calculate xh_free_start for the new bucket. */
3306 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3307 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3308 xe = &xh->xh_entries[i];
3309 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3310 if (ocfs2_xattr_is_local(xe))
3311 xe_len +=
3312 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3313 else
3314 xe_len += OCFS2_XATTR_ROOT_SIZE;
3315 if (le16_to_cpu(xe->xe_name_offset) <
3316 le16_to_cpu(xh->xh_free_start))
3317 xh->xh_free_start = xe->xe_name_offset;
3318 }
3319
80bcaf34 3320set_num_buckets:
01225596
TM
3321 /* set xh->xh_num_buckets for the new xh. */
3322 if (new_bucket_head)
3323 xh->xh_num_buckets = cpu_to_le16(1);
3324 else
3325 xh->xh_num_buckets = 0;
3326
3327 for (i = 0; i < blk_per_bucket; i++) {
3328 ocfs2_journal_dirty(handle, t_bhs[i]);
3329 if (ret)
3330 mlog_errno(ret);
3331 }
3332
3333 /* store the first_hash of the new bucket. */
3334 if (first_hash)
3335 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3336
3337 /*
80bcaf34
TM
3338 * Now only update the 1st block of the old bucket. If we
3339 * just added a new empty bucket, there is no need to modify
3340 * it.
01225596 3341 */
80bcaf34
TM
3342 if (start == count)
3343 goto out;
3344
01225596
TM
3345 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3346 memset(&xh->xh_entries[start], 0,
3347 sizeof(struct ocfs2_xattr_entry) * (count - start));
3348 xh->xh_count = cpu_to_le16(start);
3349 xh->xh_free_start = cpu_to_le16(name_offset);
3350 xh->xh_name_value_len = cpu_to_le16(name_value_len);
3351
3352 ocfs2_journal_dirty(handle, s_bhs[0]);
3353 if (ret)
3354 mlog_errno(ret);
3355
3356out:
3357 if (s_bhs) {
3358 for (i = 0; i < blk_per_bucket; i++)
3359 brelse(s_bhs[i]);
3360 }
3361 kfree(s_bhs);
3362
3363 if (t_bhs) {
3364 for (i = 0; i < blk_per_bucket; i++)
3365 brelse(t_bhs[i]);
3366 }
3367 kfree(t_bhs);
3368
3369 return ret;
3370}
3371
3372/*
3373 * Copy xattr from one bucket to another bucket.
3374 *
3375 * The caller must make sure that the journal transaction
3376 * has enough space for journaling.
3377 */
3378static int ocfs2_cp_xattr_bucket(struct inode *inode,
3379 handle_t *handle,
3380 u64 s_blkno,
3381 u64 t_blkno,
3382 int t_is_new)
3383{
3384 int ret, i;
3385 int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3386 int blocksize = inode->i_sb->s_blocksize;
3387 struct buffer_head **s_bhs, **t_bhs = NULL;
3388
3389 BUG_ON(s_blkno == t_blkno);
3390
3391 mlog(0, "cp bucket %llu to %llu, target is %d\n",
de29c085
MF
3392 (unsigned long long)s_blkno, (unsigned long long)t_blkno,
3393 t_is_new);
01225596
TM
3394
3395 s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3396 GFP_NOFS);
3397 if (!s_bhs)
3398 return -ENOMEM;
3399
3400 ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0);
3401 if (ret)
3402 goto out;
3403
3404 t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3405 GFP_NOFS);
3406 if (!t_bhs) {
3407 ret = -ENOMEM;
3408 goto out;
3409 }
3410
3411 ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new);
3412 if (ret)
3413 goto out;
3414
3415 for (i = 0; i < blk_per_bucket; i++) {
3416 ret = ocfs2_journal_access(handle, inode, t_bhs[i],
eb6ff239
JB
3417 t_is_new ?
3418 OCFS2_JOURNAL_ACCESS_CREATE :
01225596
TM
3419 OCFS2_JOURNAL_ACCESS_WRITE);
3420 if (ret)
3421 goto out;
3422 }
3423
3424 for (i = 0; i < blk_per_bucket; i++) {
3425 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3426 ocfs2_journal_dirty(handle, t_bhs[i]);
3427 }
3428
3429out:
3430 if (s_bhs) {
3431 for (i = 0; i < blk_per_bucket; i++)
3432 brelse(s_bhs[i]);
3433 }
3434 kfree(s_bhs);
3435
3436 if (t_bhs) {
3437 for (i = 0; i < blk_per_bucket; i++)
3438 brelse(t_bhs[i]);
3439 }
3440 kfree(t_bhs);
3441
3442 return ret;
3443}
3444
3445/*
3446 * Copy one xattr cluster from src_blk to to_blk.
3447 * The to_blk will become the first bucket header of the cluster, so its
3448 * xh_num_buckets will be initialized as the bucket num in the cluster.
3449 */
3450static int ocfs2_cp_xattr_cluster(struct inode *inode,
3451 handle_t *handle,
3452 struct buffer_head *first_bh,
3453 u64 src_blk,
3454 u64 to_blk,
3455 u32 *first_hash)
3456{
3457 int i, ret, credits;
3458 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3459 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3460 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3461 struct buffer_head *bh = NULL;
3462 struct ocfs2_xattr_header *xh;
3463 u64 to_blk_start = to_blk;
3464
de29c085
MF
3465 mlog(0, "cp xattrs from cluster %llu to %llu\n",
3466 (unsigned long long)src_blk, (unsigned long long)to_blk);
01225596
TM
3467
3468 /*
3469 * We need to update the new cluster and 1 more for the update of
3470 * the 1st bucket of the previous extent rec.
3471 */
3472 credits = bpc + 1;
3473 ret = ocfs2_extend_trans(handle, credits);
3474 if (ret) {
3475 mlog_errno(ret);
3476 goto out;
3477 }
3478
3479 ret = ocfs2_journal_access(handle, inode, first_bh,
3480 OCFS2_JOURNAL_ACCESS_WRITE);
3481 if (ret) {
3482 mlog_errno(ret);
3483 goto out;
3484 }
3485
3486 for (i = 0; i < num_buckets; i++) {
3487 ret = ocfs2_cp_xattr_bucket(inode, handle,
3488 src_blk, to_blk, 1);
3489 if (ret) {
3490 mlog_errno(ret);
3491 goto out;
3492 }
3493
3494 src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3495 to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3496 }
3497
3498 /* update the old bucket header. */
3499 xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3500 le16_add_cpu(&xh->xh_num_buckets, -num_buckets);
3501
3502 ocfs2_journal_dirty(handle, first_bh);
3503
3504 /* update the new bucket header. */
0fcaa56a 3505 ret = ocfs2_read_block(inode, to_blk_start, &bh);
01225596
TM
3506 if (ret < 0) {
3507 mlog_errno(ret);
3508 goto out;
3509 }
3510
3511 ret = ocfs2_journal_access(handle, inode, bh,
3512 OCFS2_JOURNAL_ACCESS_WRITE);
3513 if (ret) {
3514 mlog_errno(ret);
3515 goto out;
3516 }
3517
3518 xh = (struct ocfs2_xattr_header *)bh->b_data;
3519 xh->xh_num_buckets = cpu_to_le16(num_buckets);
3520
3521 ocfs2_journal_dirty(handle, bh);
3522
3523 if (first_hash)
3524 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3525out:
3526 brelse(bh);
3527 return ret;
3528}
3529
3530/*
80bcaf34 3531 * Move some xattrs in this cluster to the new cluster.
01225596
TM
3532 * This function should only be called when bucket size == cluster size.
3533 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3534 */
80bcaf34
TM
3535static int ocfs2_divide_xattr_cluster(struct inode *inode,
3536 handle_t *handle,
3537 u64 prev_blk,
3538 u64 new_blk,
3539 u32 *first_hash)
01225596
TM
3540{
3541 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3542 int ret, credits = 2 * blk_per_bucket;
3543
3544 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3545
3546 ret = ocfs2_extend_trans(handle, credits);
3547 if (ret) {
3548 mlog_errno(ret);
3549 return ret;
3550 }
3551
3552 /* Move half of the xattr in start_blk to the next bucket. */
80bcaf34
TM
3553 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
3554 new_blk, first_hash, 1);
01225596
TM
3555}
3556
3557/*
3558 * Move some xattrs from the old cluster to the new one since they are not
3559 * contiguous in ocfs2 xattr tree.
3560 *
3561 * new_blk starts a new separate cluster, and we will move some xattrs from
3562 * prev_blk to it. v_start will be set as the first name hash value in this
3563 * new cluster so that it can be used as e_cpos during tree insertion and
3564 * don't collide with our original b-tree operations. first_bh and header_bh
3565 * will also be updated since they will be used in ocfs2_extend_xattr_bucket
3566 * to extend the insert bucket.
3567 *
3568 * The problem is how much xattr should we move to the new one and when should
3569 * we update first_bh and header_bh?
3570 * 1. If cluster size > bucket size, that means the previous cluster has more
3571 * than 1 bucket, so just move half nums of bucket into the new cluster and
3572 * update the first_bh and header_bh if the insert bucket has been moved
3573 * to the new cluster.
3574 * 2. If cluster_size == bucket_size:
3575 * a) If the previous extent rec has more than one cluster and the insert
3576 * place isn't in the last cluster, copy the entire last cluster to the
3577 * new one. This time, we don't need to upate the first_bh and header_bh
3578 * since they will not be moved into the new cluster.
3579 * b) Otherwise, move the bottom half of the xattrs in the last cluster into
3580 * the new one. And we set the extend flag to zero if the insert place is
3581 * moved into the new allocated cluster since no extend is needed.
3582 */
3583static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3584 handle_t *handle,
3585 struct buffer_head **first_bh,
3586 struct buffer_head **header_bh,
3587 u64 new_blk,
3588 u64 prev_blk,
3589 u32 prev_clusters,
3590 u32 *v_start,
3591 int *extend)
3592{
3593 int ret = 0;
3594 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3595
3596 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
de29c085
MF
3597 (unsigned long long)prev_blk, prev_clusters,
3598 (unsigned long long)new_blk);
01225596
TM
3599
3600 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
3601 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
3602 handle,
3603 first_bh,
3604 header_bh,
3605 new_blk,
3606 prev_blk,
3607 prev_clusters,
3608 v_start);
3609 else {
3610 u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
3611
3612 if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
3613 ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
3614 last_blk, new_blk,
3615 v_start);
3616 else {
80bcaf34
TM
3617 ret = ocfs2_divide_xattr_cluster(inode, handle,
3618 last_blk, new_blk,
3619 v_start);
01225596
TM
3620
3621 if ((*header_bh)->b_blocknr == last_blk && extend)
3622 *extend = 0;
3623 }
3624 }
3625
3626 return ret;
3627}
3628
3629/*
3630 * Add a new cluster for xattr storage.
3631 *
3632 * If the new cluster is contiguous with the previous one, it will be
3633 * appended to the same extent record, and num_clusters will be updated.
3634 * If not, we will insert a new extent for it and move some xattrs in
3635 * the last cluster into the new allocated one.
3636 * We also need to limit the maximum size of a btree leaf, otherwise we'll
3637 * lose the benefits of hashing because we'll have to search large leaves.
3638 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
3639 * if it's bigger).
3640 *
3641 * first_bh is the first block of the previous extent rec and header_bh
3642 * indicates the bucket we will insert the new xattrs. They will be updated
3643 * when the header_bh is moved into the new cluster.
3644 */
3645static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3646 struct buffer_head *root_bh,
3647 struct buffer_head **first_bh,
3648 struct buffer_head **header_bh,
3649 u32 *num_clusters,
3650 u32 prev_cpos,
3651 u64 prev_blkno,
3652 int *extend)
3653{
3654 int ret, credits;
3655 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3656 u32 prev_clusters = *num_clusters;
3657 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
3658 u64 block;
3659 handle_t *handle = NULL;
3660 struct ocfs2_alloc_context *data_ac = NULL;
3661 struct ocfs2_alloc_context *meta_ac = NULL;
3662 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
f99b9b7c 3663 struct ocfs2_extent_tree et;
01225596
TM
3664
3665 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
3666 "previous xattr blkno = %llu\n",
3667 (unsigned long long)OCFS2_I(inode)->ip_blkno,
de29c085 3668 prev_cpos, (unsigned long long)prev_blkno);
01225596 3669
8d6220d6 3670 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
f99b9b7c
JB
3671
3672 ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
3673 &data_ac, &meta_ac);
01225596
TM
3674 if (ret) {
3675 mlog_errno(ret);
3676 goto leave;
3677 }
3678
f99b9b7c
JB
3679 credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
3680 clusters_to_add);
01225596
TM
3681 handle = ocfs2_start_trans(osb, credits);
3682 if (IS_ERR(handle)) {
3683 ret = PTR_ERR(handle);
3684 handle = NULL;
3685 mlog_errno(ret);
3686 goto leave;
3687 }
3688
3689 ret = ocfs2_journal_access(handle, inode, root_bh,
3690 OCFS2_JOURNAL_ACCESS_WRITE);
3691 if (ret < 0) {
3692 mlog_errno(ret);
3693 goto leave;
3694 }
3695
3696 ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
3697 clusters_to_add, &bit_off, &num_bits);
3698 if (ret < 0) {
3699 if (ret != -ENOSPC)
3700 mlog_errno(ret);
3701 goto leave;
3702 }
3703
3704 BUG_ON(num_bits > clusters_to_add);
3705
3706 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
3707 mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
3708 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3709
3710 if (prev_blkno + prev_clusters * bpc == block &&
3711 (prev_clusters + num_bits) << osb->s_clustersize_bits <=
3712 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
3713 /*
3714 * If this cluster is contiguous with the old one and
3715 * adding this new cluster, we don't surpass the limit of
3716 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
3717 * initialized and used like other buckets in the previous
3718 * cluster.
3719 * So add it as a contiguous one. The caller will handle
3720 * its init process.
3721 */
3722 v_start = prev_cpos + prev_clusters;
3723 *num_clusters = prev_clusters + num_bits;
3724 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
3725 num_bits);
3726 } else {
3727 ret = ocfs2_adjust_xattr_cross_cluster(inode,
3728 handle,
3729 first_bh,
3730 header_bh,
3731 block,
3732 prev_blkno,
3733 prev_clusters,
3734 &v_start,
3735 extend);
3736 if (ret) {
3737 mlog_errno(ret);
3738 goto leave;
3739 }
3740 }
3741
28b8ca0b
TM
3742 if (handle->h_buffer_credits < credits) {
3743 /*
3744 * The journal has been restarted before, and don't
3745 * have enough space for the insertion, so extend it
3746 * here.
3747 */
3748 ret = ocfs2_extend_trans(handle, credits);
3749 if (ret) {
3750 mlog_errno(ret);
3751 goto leave;
3752 }
3753 }
01225596 3754 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
de29c085 3755 num_bits, (unsigned long long)block, v_start);
f99b9b7c
JB
3756 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
3757 num_bits, 0, meta_ac);
01225596
TM
3758 if (ret < 0) {
3759 mlog_errno(ret);
3760 goto leave;
3761 }
3762
3763 ret = ocfs2_journal_dirty(handle, root_bh);
3764 if (ret < 0) {
3765 mlog_errno(ret);
3766 goto leave;
3767 }
3768
3769leave:
3770 if (handle)
3771 ocfs2_commit_trans(osb, handle);
3772 if (data_ac)
3773 ocfs2_free_alloc_context(data_ac);
3774 if (meta_ac)
3775 ocfs2_free_alloc_context(meta_ac);
3776
3777 return ret;
3778}
3779
3780/*
3781 * Extend a new xattr bucket and move xattrs to the end one by one until
3782 * We meet with start_bh. Only move half of the xattrs to the bucket after it.
3783 */
3784static int ocfs2_extend_xattr_bucket(struct inode *inode,
3785 struct buffer_head *first_bh,
3786 struct buffer_head *start_bh,
3787 u32 num_clusters)
3788{
3789 int ret, credits;
3790 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3791 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3792 u64 start_blk = start_bh->b_blocknr, end_blk;
3793 u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
3794 handle_t *handle;
3795 struct ocfs2_xattr_header *first_xh =
3796 (struct ocfs2_xattr_header *)first_bh->b_data;
3797 u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
3798
3799 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
de29c085 3800 "from %llu, len = %u\n", (unsigned long long)start_blk,
01225596
TM
3801 (unsigned long long)first_bh->b_blocknr, num_clusters);
3802
3803 BUG_ON(bucket >= num_buckets);
3804
3805 end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;
3806
3807 /*
3808 * We will touch all the buckets after the start_bh(include it).
3809 * Add one more bucket and modify the first_bh.
3810 */
3811 credits = end_blk - start_blk + 2 * blk_per_bucket + 1;
3812 handle = ocfs2_start_trans(osb, credits);
3813 if (IS_ERR(handle)) {
3814 ret = PTR_ERR(handle);
3815 handle = NULL;
3816 mlog_errno(ret);
3817 goto out;
3818 }
3819
3820 ret = ocfs2_journal_access(handle, inode, first_bh,
3821 OCFS2_JOURNAL_ACCESS_WRITE);
3822 if (ret) {
3823 mlog_errno(ret);
3824 goto commit;
3825 }
3826
3827 while (end_blk != start_blk) {
3828 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
3829 end_blk + blk_per_bucket, 0);
3830 if (ret)
3831 goto commit;
3832 end_blk -= blk_per_bucket;
3833 }
3834
3835 /* Move half of the xattr in start_blk to the next bucket. */
80bcaf34
TM
3836 ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk,
3837 start_blk + blk_per_bucket, NULL, 0);
01225596
TM
3838
3839 le16_add_cpu(&first_xh->xh_num_buckets, 1);
3840 ocfs2_journal_dirty(handle, first_bh);
3841
3842commit:
3843 ocfs2_commit_trans(osb, handle);
3844out:
3845 return ret;
3846}
3847
3848/*
3849 * Add new xattr bucket in an extent record and adjust the buckets accordingly.
3850 * xb_bh is the ocfs2_xattr_block.
3851 * We will move all the buckets starting from header_bh to the next place. As
3852 * for this one, half num of its xattrs will be moved to the next one.
3853 *
3854 * We will allocate a new cluster if current cluster is full and adjust
3855 * header_bh and first_bh if the insert place is moved to the new cluster.
3856 */
3857static int ocfs2_add_new_xattr_bucket(struct inode *inode,
3858 struct buffer_head *xb_bh,
3859 struct buffer_head *header_bh)
3860{
3861 struct ocfs2_xattr_header *first_xh = NULL;
3862 struct buffer_head *first_bh = NULL;
3863 struct ocfs2_xattr_block *xb =
3864 (struct ocfs2_xattr_block *)xb_bh->b_data;
3865 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3866 struct ocfs2_extent_list *el = &xb_root->xt_list;
3867 struct ocfs2_xattr_header *xh =
3868 (struct ocfs2_xattr_header *)header_bh->b_data;
3869 u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3870 struct super_block *sb = inode->i_sb;
3871 struct ocfs2_super *osb = OCFS2_SB(sb);
3872 int ret, num_buckets, extend = 1;
3873 u64 p_blkno;
3874 u32 e_cpos, num_clusters;
3875
3876 mlog(0, "Add new xattr bucket starting form %llu\n",
3877 (unsigned long long)header_bh->b_blocknr);
3878
3879 /*
3880 * Add refrence for header_bh here because it may be
3881 * changed in ocfs2_add_new_xattr_cluster and we need
3882 * to free it in the end.
3883 */
3884 get_bh(header_bh);
3885
3886 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
3887 &num_clusters, el);
3888 if (ret) {
3889 mlog_errno(ret);
3890 goto out;
3891 }
3892
0fcaa56a 3893 ret = ocfs2_read_block(inode, p_blkno, &first_bh);
01225596
TM
3894 if (ret) {
3895 mlog_errno(ret);
3896 goto out;
3897 }
3898
3899 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
3900 first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3901
3902 if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
3903 ret = ocfs2_add_new_xattr_cluster(inode,
3904 xb_bh,
3905 &first_bh,
3906 &header_bh,
3907 &num_clusters,
3908 e_cpos,
3909 p_blkno,
3910 &extend);
3911 if (ret) {
3912 mlog_errno(ret);
3913 goto out;
3914 }
3915 }
3916
3917 if (extend)
3918 ret = ocfs2_extend_xattr_bucket(inode,
3919 first_bh,
3920 header_bh,
3921 num_clusters);
3922 if (ret)
3923 mlog_errno(ret);
3924out:
3925 brelse(first_bh);
3926 brelse(header_bh);
3927 return ret;
3928}
3929
3930static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
3931 struct ocfs2_xattr_bucket *bucket,
3932 int offs)
3933{
3934 int block_off = offs >> inode->i_sb->s_blocksize_bits;
3935
3936 offs = offs % inode->i_sb->s_blocksize;
51def39f 3937 return bucket_block(bucket, block_off) + offs;
01225596
TM
3938}
3939
3940/*
3941 * Handle the normal xattr set, including replace, delete and new.
01225596
TM
3942 *
3943 * Note: "local" indicates the real data's locality. So we can't
3944 * just its bucket locality by its length.
3945 */
3946static void ocfs2_xattr_set_entry_normal(struct inode *inode,
3947 struct ocfs2_xattr_info *xi,
3948 struct ocfs2_xattr_search *xs,
3949 u32 name_hash,
5a095611 3950 int local)
01225596
TM
3951{
3952 struct ocfs2_xattr_entry *last, *xe;
3953 int name_len = strlen(xi->name);
3954 struct ocfs2_xattr_header *xh = xs->header;
3955 u16 count = le16_to_cpu(xh->xh_count), start;
3956 size_t blocksize = inode->i_sb->s_blocksize;
3957 char *val;
3958 size_t offs, size, new_size;
3959
3960 last = &xh->xh_entries[count];
3961 if (!xs->not_found) {
3962 xe = xs->here;
3963 offs = le16_to_cpu(xe->xe_name_offset);
3964 if (ocfs2_xattr_is_local(xe))
3965 size = OCFS2_XATTR_SIZE(name_len) +
3966 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3967 else
3968 size = OCFS2_XATTR_SIZE(name_len) +
3969 OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
3970
3971 /*
3972 * If the new value will be stored outside, xi->value has been
3973 * initalized as an empty ocfs2_xattr_value_root, and the same
3974 * goes with xi->value_len, so we can set new_size safely here.
3975 * See ocfs2_xattr_set_in_bucket.
3976 */
3977 new_size = OCFS2_XATTR_SIZE(name_len) +
3978 OCFS2_XATTR_SIZE(xi->value_len);
3979
3980 le16_add_cpu(&xh->xh_name_value_len, -size);
3981 if (xi->value) {
3982 if (new_size > size)
3983 goto set_new_name_value;
3984
3985 /* Now replace the old value with new one. */
3986 if (local)
3987 xe->xe_value_size = cpu_to_le64(xi->value_len);
3988 else
3989 xe->xe_value_size = 0;
3990
3991 val = ocfs2_xattr_bucket_get_val(inode,
3992 &xs->bucket, offs);
3993 memset(val + OCFS2_XATTR_SIZE(name_len), 0,
3994 size - OCFS2_XATTR_SIZE(name_len));
3995 if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
3996 memcpy(val + OCFS2_XATTR_SIZE(name_len),
3997 xi->value, xi->value_len);
3998
3999 le16_add_cpu(&xh->xh_name_value_len, new_size);
4000 ocfs2_xattr_set_local(xe, local);
4001 return;
4002 } else {
5a095611
TM
4003 /*
4004 * Remove the old entry if there is more than one.
4005 * We don't remove the last entry so that we can
4006 * use it to indicate the hash value of the empty
4007 * bucket.
4008 */
01225596 4009 last -= 1;
01225596 4010 le16_add_cpu(&xh->xh_count, -1);
5a095611
TM
4011 if (xh->xh_count) {
4012 memmove(xe, xe + 1,
4013 (void *)last - (void *)xe);
4014 memset(last, 0,
4015 sizeof(struct ocfs2_xattr_entry));
4016 } else
4017 xh->xh_free_start =
4018 cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4019
01225596
TM
4020 return;
4021 }
4022 } else {
4023 /* find a new entry for insert. */
4024 int low = 0, high = count - 1, tmp;
4025 struct ocfs2_xattr_entry *tmp_xe;
4026
5a095611 4027 while (low <= high && count) {
01225596
TM
4028 tmp = (low + high) / 2;
4029 tmp_xe = &xh->xh_entries[tmp];
4030
4031 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
4032 low = tmp + 1;
4033 else if (name_hash <
4034 le32_to_cpu(tmp_xe->xe_name_hash))
4035 high = tmp - 1;
06b240d8
TM
4036 else {
4037 low = tmp;
01225596 4038 break;
06b240d8 4039 }
01225596
TM
4040 }
4041
4042 xe = &xh->xh_entries[low];
4043 if (low != count)
4044 memmove(xe + 1, xe, (void *)last - (void *)xe);
4045
4046 le16_add_cpu(&xh->xh_count, 1);
4047 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4048 xe->xe_name_hash = cpu_to_le32(name_hash);
4049 xe->xe_name_len = name_len;
4050 ocfs2_xattr_set_type(xe, xi->name_index);
4051 }
4052
4053set_new_name_value:
4054 /* Insert the new name+value. */
4055 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4056
4057 /*
4058 * We must make sure that the name/value pair
4059 * exists in the same block.
4060 */
4061 offs = le16_to_cpu(xh->xh_free_start);
4062 start = offs - size;
4063
4064 if (start >> inode->i_sb->s_blocksize_bits !=
4065 (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4066 offs = offs - offs % blocksize;
4067 xh->xh_free_start = cpu_to_le16(offs);
4068 }
4069
4070 val = ocfs2_xattr_bucket_get_val(inode,
4071 &xs->bucket, offs - size);
4072 xe->xe_name_offset = cpu_to_le16(offs - size);
4073
4074 memset(val, 0, size);
4075 memcpy(val, xi->name, name_len);
4076 memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4077
4078 xe->xe_value_size = cpu_to_le64(xi->value_len);
4079 ocfs2_xattr_set_local(xe, local);
4080 xs->here = xe;
4081 le16_add_cpu(&xh->xh_free_start, -size);
4082 le16_add_cpu(&xh->xh_name_value_len, size);
4083
4084 return;
4085}
4086
4087static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
4088 handle_t *handle,
4089 struct ocfs2_xattr_search *xs,
4090 struct buffer_head **bhs,
4091 u16 bh_num)
4092{
4093 int ret = 0, off, block_off;
4094 struct ocfs2_xattr_entry *xe = xs->here;
4095
4096 /*
4097 * First calculate all the blocks we should journal_access
4098 * and journal_dirty. The first block should always be touched.
4099 */
4100 ret = ocfs2_journal_dirty(handle, bhs[0]);
4101 if (ret)
4102 mlog_errno(ret);
4103
4104 /* calc the data. */
4105 off = le16_to_cpu(xe->xe_name_offset);
4106 block_off = off >> inode->i_sb->s_blocksize_bits;
4107 ret = ocfs2_journal_dirty(handle, bhs[block_off]);
4108 if (ret)
4109 mlog_errno(ret);
4110
4111 return ret;
4112}
4113
4114/*
4115 * Set the xattr entry in the specified bucket.
4116 * The bucket is indicated by xs->bucket and it should have the enough
4117 * space for the xattr insertion.
4118 */
4119static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4120 struct ocfs2_xattr_info *xi,
4121 struct ocfs2_xattr_search *xs,
4122 u32 name_hash,
5a095611 4123 int local)
01225596
TM
4124{
4125 int i, ret;
4126 handle_t *handle = NULL;
4127 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4128 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4129
ff1ec20e
MF
4130 mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4131 (unsigned long)xi->value_len, xi->name_index,
9c7759aa 4132 (unsigned long long)bucket_blkno(&xs->bucket));
01225596 4133
4ac6032d 4134 if (!xs->bucket.bu_bhs[1]) {
31d33073 4135 ret = ocfs2_read_blocks(inode,
9c7759aa 4136 bucket_blkno(&xs->bucket) + 1,
4ac6032d 4137 blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
1efd47f8 4138 0);
01225596
TM
4139 if (ret) {
4140 mlog_errno(ret);
4141 goto out;
4142 }
4143 }
4144
4145 handle = ocfs2_start_trans(osb, blk_per_bucket);
4146 if (IS_ERR(handle)) {
4147 ret = PTR_ERR(handle);
4148 handle = NULL;
4149 mlog_errno(ret);
4150 goto out;
4151 }
4152
4153 for (i = 0; i < blk_per_bucket; i++) {
4ac6032d 4154 ret = ocfs2_journal_access(handle, inode, xs->bucket.bu_bhs[i],
01225596
TM
4155 OCFS2_JOURNAL_ACCESS_WRITE);
4156 if (ret < 0) {
4157 mlog_errno(ret);
4158 goto out;
4159 }
4160 }
4161
5a095611 4162 ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
01225596
TM
4163
4164 /*Only dirty the blocks we have touched in set xattr. */
4165 ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
4ac6032d 4166 xs->bucket.bu_bhs, blk_per_bucket);
01225596
TM
4167 if (ret)
4168 mlog_errno(ret);
4169out:
4170 ocfs2_commit_trans(osb, handle);
4171
4172 return ret;
4173}
4174
4175static int ocfs2_xattr_value_update_size(struct inode *inode,
4176 struct buffer_head *xe_bh,
4177 struct ocfs2_xattr_entry *xe,
4178 u64 new_size)
4179{
4180 int ret;
4181 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4182 handle_t *handle = NULL;
4183
4184 handle = ocfs2_start_trans(osb, 1);
d3264799 4185 if (IS_ERR(handle)) {
01225596
TM
4186 ret = -ENOMEM;
4187 mlog_errno(ret);
4188 goto out;
4189 }
4190
4191 ret = ocfs2_journal_access(handle, inode, xe_bh,
4192 OCFS2_JOURNAL_ACCESS_WRITE);
4193 if (ret < 0) {
4194 mlog_errno(ret);
4195 goto out_commit;
4196 }
4197
4198 xe->xe_value_size = cpu_to_le64(new_size);
4199
4200 ret = ocfs2_journal_dirty(handle, xe_bh);
4201 if (ret < 0)
4202 mlog_errno(ret);
4203
4204out_commit:
4205 ocfs2_commit_trans(osb, handle);
4206out:
4207 return ret;
4208}
4209
4210/*
4211 * Truncate the specified xe_off entry in xattr bucket.
4212 * bucket is indicated by header_bh and len is the new length.
4213 * Both the ocfs2_xattr_value_root and the entry will be updated here.
4214 *
4215 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4216 */
4217static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4218 struct buffer_head *header_bh,
4219 int xe_off,
4220 int len)
4221{
4222 int ret, offset;
4223 u64 value_blk;
4224 struct buffer_head *value_bh = NULL;
4225 struct ocfs2_xattr_value_root *xv;
4226 struct ocfs2_xattr_entry *xe;
4227 struct ocfs2_xattr_header *xh =
4228 (struct ocfs2_xattr_header *)header_bh->b_data;
4229 size_t blocksize = inode->i_sb->s_blocksize;
4230
4231 xe = &xh->xh_entries[xe_off];
4232
4233 BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4234
4235 offset = le16_to_cpu(xe->xe_name_offset) +
4236 OCFS2_XATTR_SIZE(xe->xe_name_len);
4237
4238 value_blk = offset / blocksize;
4239
4240 /* We don't allow ocfs2_xattr_value to be stored in different block. */
4241 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4242 value_blk += header_bh->b_blocknr;
4243
0fcaa56a 4244 ret = ocfs2_read_block(inode, value_blk, &value_bh);
01225596
TM
4245 if (ret) {
4246 mlog_errno(ret);
4247 goto out;
4248 }
4249
4250 xv = (struct ocfs2_xattr_value_root *)
4251 (value_bh->b_data + offset % blocksize);
4252
4253 mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4254 xe_off, (unsigned long long)header_bh->b_blocknr, len);
4255 ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
4256 if (ret) {
4257 mlog_errno(ret);
4258 goto out;
4259 }
4260
4261 ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len);
4262 if (ret) {
4263 mlog_errno(ret);
4264 goto out;
4265 }
4266
4267out:
4268 brelse(value_bh);
4269 return ret;
4270}
4271
4272static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4273 struct ocfs2_xattr_search *xs,
4274 int len)
4275{
4276 int ret, offset;
4277 struct ocfs2_xattr_entry *xe = xs->here;
4278 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4279
4ac6032d 4280 BUG_ON(!xs->bucket.bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
01225596
TM
4281
4282 offset = xe - xh->xh_entries;
4ac6032d 4283 ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bu_bhs[0],
01225596
TM
4284 offset, len);
4285 if (ret)
4286 mlog_errno(ret);
4287
4288 return ret;
4289}
4290
4291static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4292 struct ocfs2_xattr_search *xs,
4293 char *val,
4294 int value_len)
4295{
4296 int offset;
4297 struct ocfs2_xattr_value_root *xv;
4298 struct ocfs2_xattr_entry *xe = xs->here;
4299
4300 BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4301
4302 offset = le16_to_cpu(xe->xe_name_offset) +
4303 OCFS2_XATTR_SIZE(xe->xe_name_len);
4304
4305 xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4306
4307 return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len);
4308}
4309
01225596
TM
4310static int ocfs2_rm_xattr_cluster(struct inode *inode,
4311 struct buffer_head *root_bh,
4312 u64 blkno,
4313 u32 cpos,
4314 u32 len)
4315{
4316 int ret;
4317 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4318 struct inode *tl_inode = osb->osb_tl_inode;
4319 handle_t *handle;
4320 struct ocfs2_xattr_block *xb =
4321 (struct ocfs2_xattr_block *)root_bh->b_data;
01225596
TM
4322 struct ocfs2_alloc_context *meta_ac = NULL;
4323 struct ocfs2_cached_dealloc_ctxt dealloc;
f99b9b7c
JB
4324 struct ocfs2_extent_tree et;
4325
8d6220d6 4326 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
01225596
TM
4327
4328 ocfs2_init_dealloc_ctxt(&dealloc);
4329
4330 mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4331 cpos, len, (unsigned long long)blkno);
4332
4333 ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4334
f99b9b7c 4335 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
01225596
TM
4336 if (ret) {
4337 mlog_errno(ret);
4338 return ret;
4339 }
4340
4341 mutex_lock(&tl_inode->i_mutex);
4342
4343 if (ocfs2_truncate_log_needs_flush(osb)) {
4344 ret = __ocfs2_flush_truncate_log(osb);
4345 if (ret < 0) {
4346 mlog_errno(ret);
4347 goto out;
4348 }
4349 }
4350
4351 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
d3264799 4352 if (IS_ERR(handle)) {
01225596
TM
4353 ret = -ENOMEM;
4354 mlog_errno(ret);
4355 goto out;
4356 }
4357
4358 ret = ocfs2_journal_access(handle, inode, root_bh,
4359 OCFS2_JOURNAL_ACCESS_WRITE);
4360 if (ret) {
4361 mlog_errno(ret);
4362 goto out_commit;
4363 }
4364
f99b9b7c
JB
4365 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
4366 &dealloc);
01225596
TM
4367 if (ret) {
4368 mlog_errno(ret);
4369 goto out_commit;
4370 }
4371
4372 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4373
4374 ret = ocfs2_journal_dirty(handle, root_bh);
4375 if (ret) {
4376 mlog_errno(ret);
4377 goto out_commit;
4378 }
4379
4380 ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4381 if (ret)
4382 mlog_errno(ret);
4383
4384out_commit:
4385 ocfs2_commit_trans(osb, handle);
4386out:
4387 ocfs2_schedule_truncate_log_flush(osb, 1);
4388
4389 mutex_unlock(&tl_inode->i_mutex);
4390
4391 if (meta_ac)
4392 ocfs2_free_alloc_context(meta_ac);
4393
4394 ocfs2_run_deallocs(osb, &dealloc);
4395
4396 return ret;
4397}
4398
01225596
TM
4399static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4400 struct ocfs2_xattr_search *xs)
4401{
4402 handle_t *handle = NULL;
3e632946 4403 struct ocfs2_xattr_header *xh = bucket_xh(&xs->bucket);
01225596
TM
4404 struct ocfs2_xattr_entry *last = &xh->xh_entries[
4405 le16_to_cpu(xh->xh_count) - 1];
4406 int ret = 0;
4407
4408 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1);
4409 if (IS_ERR(handle)) {
4410 ret = PTR_ERR(handle);
4411 mlog_errno(ret);
4412 return;
4413 }
4414
4ac6032d 4415 ret = ocfs2_journal_access(handle, inode, xs->bucket.bu_bhs[0],
01225596
TM
4416 OCFS2_JOURNAL_ACCESS_WRITE);
4417 if (ret) {
4418 mlog_errno(ret);
4419 goto out_commit;
4420 }
4421
4422 /* Remove the old entry. */
4423 memmove(xs->here, xs->here + 1,
4424 (void *)last - (void *)xs->here);
4425 memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4426 le16_add_cpu(&xh->xh_count, -1);
4427
4ac6032d 4428 ret = ocfs2_journal_dirty(handle, xs->bucket.bu_bhs[0]);
01225596
TM
4429 if (ret < 0)
4430 mlog_errno(ret);
4431out_commit:
4432 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
4433}
4434
4435/*
4436 * Set the xattr name/value in the bucket specified in xs.
4437 *
4438 * As the new value in xi may be stored in the bucket or in an outside cluster,
4439 * we divide the whole process into 3 steps:
4440 * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4441 * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4442 * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4443 * 4. If the clusters for the new outside value can't be allocated, we need
4444 * to free the xattr we allocated in set.
4445 */
4446static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4447 struct ocfs2_xattr_info *xi,
4448 struct ocfs2_xattr_search *xs)
4449{
5a095611 4450 int ret, local = 1;
01225596
TM
4451 size_t value_len;
4452 char *val = (char *)xi->value;
4453 struct ocfs2_xattr_entry *xe = xs->here;
2057e5c6
TM
4454 u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
4455 strlen(xi->name));
01225596
TM
4456
4457 if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4458 /*
4459 * We need to truncate the xattr storage first.
4460 *
4461 * If both the old and new value are stored to
4462 * outside block, we only need to truncate
4463 * the storage and then set the value outside.
4464 *
4465 * If the new value should be stored within block,
4466 * we should free all the outside block first and
4467 * the modification to the xattr block will be done
4468 * by following steps.
4469 */
4470 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4471 value_len = xi->value_len;
4472 else
4473 value_len = 0;
4474
4475 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4476 value_len);
4477 if (ret)
4478 goto out;
4479
4480 if (value_len)
4481 goto set_value_outside;
4482 }
4483
4484 value_len = xi->value_len;
4485 /* So we have to handle the inside block change now. */
4486 if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4487 /*
4488 * If the new value will be stored outside of block,
4489 * initalize a new empty value root and insert it first.
4490 */
4491 local = 0;
4492 xi->value = &def_xv;
4493 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4494 }
4495
5a095611 4496 ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local);
01225596
TM
4497 if (ret) {
4498 mlog_errno(ret);
4499 goto out;
4500 }
4501
5a095611
TM
4502 if (value_len <= OCFS2_XATTR_INLINE_SIZE)
4503 goto out;
01225596 4504
5a095611
TM
4505 /* allocate the space now for the outside block storage. */
4506 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4507 value_len);
4508 if (ret) {
4509 mlog_errno(ret);
4510
4511 if (xs->not_found) {
4512 /*
4513 * We can't allocate enough clusters for outside
4514 * storage and we have allocated xattr already,
4515 * so need to remove it.
4516 */
4517 ocfs2_xattr_bucket_remove_xs(inode, xs);
01225596 4518 }
01225596
TM
4519 goto out;
4520 }
4521
4522set_value_outside:
4523 ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len);
4524out:
4525 return ret;
4526}
4527
80bcaf34
TM
4528/*
4529 * check whether the xattr bucket is filled up with the same hash value.
4530 * If we want to insert the xattr with the same hash, return -ENOSPC.
4531 * If we want to insert a xattr with different hash value, go ahead
4532 * and ocfs2_divide_xattr_bucket will handle this.
4533 */
01225596 4534static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
80bcaf34
TM
4535 struct ocfs2_xattr_bucket *bucket,
4536 const char *name)
01225596 4537{
3e632946 4538 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
80bcaf34
TM
4539 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
4540
4541 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
4542 return 0;
01225596
TM
4543
4544 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4545 xh->xh_entries[0].xe_name_hash) {
4546 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4547 "hash = %u\n",
9c7759aa 4548 (unsigned long long)bucket_blkno(bucket),
01225596
TM
4549 le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4550 return -ENOSPC;
4551 }
4552
4553 return 0;
4554}
4555
4556static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4557 struct ocfs2_xattr_info *xi,
4558 struct ocfs2_xattr_search *xs)
4559{
4560 struct ocfs2_xattr_header *xh;
4561 struct ocfs2_xattr_entry *xe;
4562 u16 count, header_size, xh_free_start;
6dde41d9 4563 int free, max_free, need, old;
01225596
TM
4564 size_t value_size = 0, name_len = strlen(xi->name);
4565 size_t blocksize = inode->i_sb->s_blocksize;
4566 int ret, allocation = 0;
01225596
TM
4567
4568 mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4569
4570try_again:
4571 xh = xs->header;
4572 count = le16_to_cpu(xh->xh_count);
4573 xh_free_start = le16_to_cpu(xh->xh_free_start);
4574 header_size = sizeof(struct ocfs2_xattr_header) +
4575 count * sizeof(struct ocfs2_xattr_entry);
4576 max_free = OCFS2_XATTR_BUCKET_SIZE -
4577 le16_to_cpu(xh->xh_name_value_len) - header_size;
4578
4579 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4580 "of %u which exceed block size\n",
9c7759aa 4581 (unsigned long long)bucket_blkno(&xs->bucket),
01225596
TM
4582 header_size);
4583
4584 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4585 value_size = OCFS2_XATTR_ROOT_SIZE;
4586 else if (xi->value)
4587 value_size = OCFS2_XATTR_SIZE(xi->value_len);
4588
4589 if (xs->not_found)
4590 need = sizeof(struct ocfs2_xattr_entry) +
4591 OCFS2_XATTR_SIZE(name_len) + value_size;
4592 else {
4593 need = value_size + OCFS2_XATTR_SIZE(name_len);
4594
4595 /*
4596 * We only replace the old value if the new length is smaller
4597 * than the old one. Otherwise we will allocate new space in the
4598 * bucket to store it.
4599 */
4600 xe = xs->here;
4601 if (ocfs2_xattr_is_local(xe))
4602 old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4603 else
4604 old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4605
4606 if (old >= value_size)
4607 need = 0;
4608 }
4609
4610 free = xh_free_start - header_size;
4611 /*
4612 * We need to make sure the new name/value pair
4613 * can exist in the same block.
4614 */
4615 if (xh_free_start % blocksize < need)
4616 free -= xh_free_start % blocksize;
4617
4618 mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4619 "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4620 " %u\n", xs->not_found,
9c7759aa 4621 (unsigned long long)bucket_blkno(&xs->bucket),
01225596
TM
4622 free, need, max_free, le16_to_cpu(xh->xh_free_start),
4623 le16_to_cpu(xh->xh_name_value_len));
4624
4625 if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4626 if (need <= max_free &&
4627 count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4628 /*
4629 * We can create the space by defragment. Since only the
4630 * name/value will be moved, the xe shouldn't be changed
4631 * in xs.
4632 */
4633 ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
4634 if (ret) {
4635 mlog_errno(ret);
4636 goto out;
4637 }
4638
4639 xh_free_start = le16_to_cpu(xh->xh_free_start);
4640 free = xh_free_start - header_size;
4641 if (xh_free_start % blocksize < need)
4642 free -= xh_free_start % blocksize;
4643
4644 if (free >= need)
4645 goto xattr_set;
4646
4647 mlog(0, "Can't get enough space for xattr insert by "
4648 "defragment. Need %u bytes, but we have %d, so "
4649 "allocate new bucket for it.\n", need, free);
4650 }
4651
4652 /*
4653 * We have to add new buckets or clusters and one
4654 * allocation should leave us enough space for insert.
4655 */
4656 BUG_ON(allocation);
4657
4658 /*
4659 * We do not allow for overlapping ranges between buckets. And
4660 * the maximum number of collisions we will allow for then is
4661 * one bucket's worth, so check it here whether we need to
4662 * add a new bucket for the insert.
4663 */
80bcaf34
TM
4664 ret = ocfs2_check_xattr_bucket_collision(inode,
4665 &xs->bucket,
4666 xi->name);
01225596
TM
4667 if (ret) {
4668 mlog_errno(ret);
4669 goto out;
4670 }
4671
4672 ret = ocfs2_add_new_xattr_bucket(inode,
4673 xs->xattr_bh,
4ac6032d 4674 xs->bucket.bu_bhs[0]);
01225596
TM
4675 if (ret) {
4676 mlog_errno(ret);
4677 goto out;
4678 }
4679
6dde41d9 4680 ocfs2_xattr_bucket_relse(inode, &xs->bucket);
01225596
TM
4681 memset(&xs->bucket, 0, sizeof(xs->bucket));
4682
4683 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
4684 xi->name_index,
4685 xi->name, xs);
4686 if (ret && ret != -ENODATA)
4687 goto out;
4688 xs->not_found = ret;
4689 allocation = 1;
4690 goto try_again;
4691 }
4692
4693xattr_set:
4694 ret = ocfs2_xattr_set_in_bucket(inode, xi, xs);
4695out:
4696 mlog_exit(ret);
4697 return ret;
4698}
a3944256
TM
4699
4700static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
4701 struct ocfs2_xattr_bucket *bucket,
4702 void *para)
4703{
4704 int ret = 0;
3e632946 4705 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
a3944256
TM
4706 u16 i;
4707 struct ocfs2_xattr_entry *xe;
4708
4709 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4710 xe = &xh->xh_entries[i];
4711 if (ocfs2_xattr_is_local(xe))
4712 continue;
4713
4714 ret = ocfs2_xattr_bucket_value_truncate(inode,
4ac6032d 4715 bucket->bu_bhs[0],
a3944256
TM
4716 i, 0);
4717 if (ret) {
4718 mlog_errno(ret);
4719 break;
4720 }
4721 }
4722
4723 return ret;
4724}
4725
4726static int ocfs2_delete_xattr_index_block(struct inode *inode,
4727 struct buffer_head *xb_bh)
4728{
4729 struct ocfs2_xattr_block *xb =
4730 (struct ocfs2_xattr_block *)xb_bh->b_data;
4731 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4732 int ret = 0;
4733 u32 name_hash = UINT_MAX, e_cpos, num_clusters;
4734 u64 p_blkno;
4735
4736 if (le16_to_cpu(el->l_next_free_rec) == 0)
4737 return 0;
4738
4739 while (name_hash > 0) {
4740 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4741 &e_cpos, &num_clusters, el);
4742 if (ret) {
4743 mlog_errno(ret);
4744 goto out;
4745 }
4746
4747 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
4748 ocfs2_delete_xattr_in_bucket,
4749 NULL);
4750 if (ret) {
4751 mlog_errno(ret);
4752 goto out;
4753 }
4754
4755 ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
4756 p_blkno, e_cpos, num_clusters);
4757 if (ret) {
4758 mlog_errno(ret);
4759 break;
4760 }
4761
4762 if (e_cpos == 0)
4763 break;
4764
4765 name_hash = e_cpos - 1;
4766 }
4767
4768out:
4769 return ret;
4770}
99219aea
MF
4771
4772/*
4773 * 'trusted' attributes support
4774 */
99219aea
MF
4775static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
4776 size_t list_size, const char *name,
4777 size_t name_len)
4778{
ceb1eba3 4779 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
99219aea
MF
4780 const size_t total_len = prefix_len + name_len + 1;
4781
4782 if (list && total_len <= list_size) {
4783 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
4784 memcpy(list + prefix_len, name, name_len);
4785 list[prefix_len + name_len] = '\0';
4786 }
4787 return total_len;
4788}
4789
4790static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
4791 void *buffer, size_t size)
4792{
4793 if (strcmp(name, "") == 0)
4794 return -EINVAL;
4795 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
4796 buffer, size);
4797}
4798
4799static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
4800 const void *value, size_t size, int flags)
4801{
4802 if (strcmp(name, "") == 0)
4803 return -EINVAL;
4804
4805 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
4806 size, flags);
4807}
4808
4809struct xattr_handler ocfs2_xattr_trusted_handler = {
4810 .prefix = XATTR_TRUSTED_PREFIX,
4811 .list = ocfs2_xattr_trusted_list,
4812 .get = ocfs2_xattr_trusted_get,
4813 .set = ocfs2_xattr_trusted_set,
4814};
4815
99219aea
MF
4816/*
4817 * 'user' attributes support
4818 */
99219aea
MF
4819static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
4820 size_t list_size, const char *name,
4821 size_t name_len)
4822{
ceb1eba3 4823 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
99219aea
MF
4824 const size_t total_len = prefix_len + name_len + 1;
4825 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4826
4827 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
4828 return 0;
4829
4830 if (list && total_len <= list_size) {
4831 memcpy(list, XATTR_USER_PREFIX, prefix_len);
4832 memcpy(list + prefix_len, name, name_len);
4833 list[prefix_len + name_len] = '\0';
4834 }
4835 return total_len;
4836}
4837
4838static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
4839 void *buffer, size_t size)
4840{
4841 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4842
4843 if (strcmp(name, "") == 0)
4844 return -EINVAL;
4845 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
4846 return -EOPNOTSUPP;
4847 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
4848 buffer, size);
4849}
4850
4851static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
4852 const void *value, size_t size, int flags)
4853{
4854 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4855
4856 if (strcmp(name, "") == 0)
4857 return -EINVAL;
4858 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
4859 return -EOPNOTSUPP;
4860
4861 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
4862 size, flags);
4863}
4864
4865struct xattr_handler ocfs2_xattr_user_handler = {
4866 .prefix = XATTR_USER_PREFIX,
4867 .list = ocfs2_xattr_user_list,
4868 .get = ocfs2_xattr_user_get,
4869 .set = ocfs2_xattr_user_set,
4870};