]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/gfs2/ops_fstype.c
GFS2: Use RCU/hlist_bl based hash for quotas
[mirror_ubuntu-jammy-kernel.git] / fs / gfs2 / ops_fstype.c
CommitLineData
b3b94faa
DT
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
cf45b752 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
b3b94faa
DT
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
e9fc2aa0 7 * of the GNU General Public License version 2.
b3b94faa
DT
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
b3b94faa
DT
15#include <linux/blkdev.h>
16#include <linux/kthread.h>
afeacc8c 17#include <linux/export.h>
86384605
AD
18#include <linux/namei.h>
19#include <linux/mount.h>
5c676f6d 20#include <linux/gfs2_ondisk.h>
cc632e7f 21#include <linux/quotaops.h>
56aa72d0 22#include <linux/lockdep.h>
7f78e035 23#include <linux/module.h>
b3b94faa
DT
24
25#include "gfs2.h"
5c676f6d 26#include "incore.h"
da6dd40d 27#include "bmap.h"
b3b94faa
DT
28#include "glock.h"
29#include "glops.h"
30#include "inode.h"
b3b94faa
DT
31#include "recovery.h"
32#include "rgrp.h"
33#include "super.h"
b3b94faa 34#include "sys.h"
5c676f6d 35#include "util.h"
bb3b0e3d 36#include "log.h"
9ac1b4d9 37#include "quota.h"
b5289681 38#include "dir.h"
70d4ee94 39#include "meta_io.h"
63997775 40#include "trace_gfs2.h"
b3b94faa
DT
41
42#define DO 0
43#define UNDO 1
44
9b8df98f
SW
45/**
46 * gfs2_tune_init - Fill a gfs2_tune structure with default values
47 * @gt: tune
48 *
49 */
50
51static void gfs2_tune_init(struct gfs2_tune *gt)
52{
53 spin_lock_init(&gt->gt_spin);
54
9b8df98f
SW
55 gt->gt_quota_warn_period = 10;
56 gt->gt_quota_scale_num = 1;
57 gt->gt_quota_scale_den = 1;
9b8df98f
SW
58 gt->gt_new_files_jdata = 0;
59 gt->gt_max_readahead = 1 << 18;
9b8df98f 60 gt->gt_complain_secs = 10;
9b8df98f
SW
61}
62
b3b94faa
DT
63static struct gfs2_sbd *init_sbd(struct super_block *sb)
64{
65 struct gfs2_sbd *sdp;
70d4ee94 66 struct address_space *mapping;
b3b94faa 67
85d1da67 68 sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL);
b3b94faa
DT
69 if (!sdp)
70 return NULL;
71
5c676f6d 72 sb->s_fs_info = sdp;
b3b94faa 73 sdp->sd_vfs = sb;
a245769f
SW
74 sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats);
75 if (!sdp->sd_lkstats) {
76 kfree(sdp);
77 return NULL;
78 }
79
ba6e9364 80 set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
b3b94faa
DT
81 gfs2_tune_init(&sdp->sd_tune);
82
e402746a
SW
83 init_waitqueue_head(&sdp->sd_glock_wait);
84 atomic_set(&sdp->sd_glock_disposal, 0);
3942ae53 85 init_completion(&sdp->sd_locking_init);
fd95e81c 86 init_completion(&sdp->sd_wdack);
b3b94faa 87 spin_lock_init(&sdp->sd_statfs_spin);
b3b94faa
DT
88
89 spin_lock_init(&sdp->sd_rindex_spin);
7c9ca621 90 sdp->sd_rindex_tree.rb_node = NULL;
b3b94faa
DT
91
92 INIT_LIST_HEAD(&sdp->sd_jindex_list);
93 spin_lock_init(&sdp->sd_jindex_spin);
f55ab26a 94 mutex_init(&sdp->sd_jindex_mutex);
b3b94faa 95
b3b94faa 96 INIT_LIST_HEAD(&sdp->sd_quota_list);
f55ab26a 97 mutex_init(&sdp->sd_quota_mutex);
e46c772d 98 mutex_init(&sdp->sd_quota_sync_mutex);
37b2c837 99 init_waitqueue_head(&sdp->sd_quota_wait);
813e0c46
SW
100 INIT_LIST_HEAD(&sdp->sd_trunc_list);
101 spin_lock_init(&sdp->sd_trunc_lock);
b3b94faa 102
70d4ee94
SW
103 mapping = &sdp->sd_aspace;
104
39849d69 105 address_space_init_once(mapping);
70d4ee94
SW
106 mapping->a_ops = &gfs2_meta_aops;
107 mapping->host = sb->s_bdev->bd_inode;
108 mapping->flags = 0;
109 mapping_set_gfp_mask(mapping, GFP_NOFS);
110 mapping->private_data = NULL;
111 mapping->backing_dev_info = sb->s_bdi;
112 mapping->writeback_index = 0;
113
b3b94faa 114 spin_lock_init(&sdp->sd_log_lock);
5e687eac 115 atomic_set(&sdp->sd_log_pinned, 0);
b3b94faa
DT
116 INIT_LIST_HEAD(&sdp->sd_log_le_buf);
117 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
b3b94faa 118 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
d7b616e2 119 INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
45138990 120 spin_lock_init(&sdp->sd_ordered_lock);
b3b94faa 121
5e687eac
BM
122 init_waitqueue_head(&sdp->sd_log_waitq);
123 init_waitqueue_head(&sdp->sd_logd_waitq);
d6a079e8 124 spin_lock_init(&sdp->sd_ail_lock);
b3b94faa
DT
125 INIT_LIST_HEAD(&sdp->sd_ail1_list);
126 INIT_LIST_HEAD(&sdp->sd_ail2_list);
127
484adff8 128 init_rwsem(&sdp->sd_log_flush_lock);
16615be1
SW
129 atomic_set(&sdp->sd_log_in_flight, 0);
130 init_waitqueue_head(&sdp->sd_log_flush_wait);
b3b94faa
DT
131
132 INIT_LIST_HEAD(&sdp->sd_revoke_list);
133
b3b94faa
DT
134 return sdp;
135}
136
b3b94faa 137
9b8df98f
SW
138/**
139 * gfs2_check_sb - Check superblock
140 * @sdp: the filesystem
141 * @sb: The superblock
142 * @silent: Don't print a message if the check fails
143 *
144 * Checks the version code of the FS is one that we understand how to
145 * read and that the sizes of the various on-disk structures have not
146 * changed.
147 */
148
32e471ef 149static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
9b8df98f 150{
32e471ef
SW
151 struct gfs2_sb_host *sb = &sdp->sd_sb;
152
9b8df98f
SW
153 if (sb->sb_magic != GFS2_MAGIC ||
154 sb->sb_type != GFS2_METATYPE_SB) {
155 if (!silent)
156 printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
157 return -EINVAL;
158 }
159
160 /* If format numbers match exactly, we're done. */
161
162 if (sb->sb_fs_format == GFS2_FORMAT_FS &&
163 sb->sb_multihost_format == GFS2_FORMAT_MULTI)
164 return 0;
165
c80dbb58 166 fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
9b8df98f 167
c80dbb58 168 return -EINVAL;
9b8df98f
SW
169}
170
171static void end_bio_io_page(struct bio *bio, int error)
172{
173 struct page *page = bio->bi_private;
174
175 if (!error)
176 SetPageUptodate(page);
177 else
178 printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
179 unlock_page(page);
180}
181
32e471ef 182static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
9b8df98f 183{
32e471ef
SW
184 struct gfs2_sb_host *sb = &sdp->sd_sb;
185 struct super_block *s = sdp->sd_vfs;
9b8df98f
SW
186 const struct gfs2_sb *str = buf;
187
188 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
189 sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
190 sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
191 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
192 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
193 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
194 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
195 sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
196 sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
197 sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
198 sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
199
200 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
201 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
32e471ef 202 memcpy(s->s_uuid, str->sb_uuid, 16);
9b8df98f
SW
203}
204
205/**
206 * gfs2_read_super - Read the gfs2 super block from disk
207 * @sdp: The GFS2 super block
208 * @sector: The location of the super block
209 * @error: The error code to return
210 *
211 * This uses the bio functions to read the super block from disk
212 * because we want to be 100% sure that we never read cached data.
213 * A super block is read twice only during each GFS2 mount and is
214 * never written to by the filesystem. The first time its read no
215 * locks are held, and the only details which are looked at are those
216 * relating to the locking protocol. Once locking is up and working,
217 * the sb is read again under the lock to establish the location of
218 * the master directory (contains pointers to journals etc) and the
219 * root directory.
220 *
221 * Returns: 0 on success or error
222 */
223
32e471ef 224static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
9b8df98f
SW
225{
226 struct super_block *sb = sdp->sd_vfs;
227 struct gfs2_sb *p;
228 struct page *page;
229 struct bio *bio;
230
231 page = alloc_page(GFP_NOFS);
232 if (unlikely(!page))
233 return -ENOBUFS;
234
235 ClearPageUptodate(page);
236 ClearPageDirty(page);
237 lock_page(page);
238
239 bio = bio_alloc(GFP_NOFS, 1);
9b8df98f
SW
240 bio->bi_sector = sector * (sb->s_blocksize >> 9);
241 bio->bi_bdev = sb->s_bdev;
242 bio_add_page(bio, page, PAGE_SIZE, 0);
243
244 bio->bi_end_io = end_bio_io_page;
245 bio->bi_private = page;
20ed0535 246 submit_bio(READ_SYNC | REQ_META, bio);
9b8df98f
SW
247 wait_on_page_locked(page);
248 bio_put(bio);
249 if (!PageUptodate(page)) {
250 __free_page(page);
251 return -EIO;
252 }
253 p = kmap(page);
32e471ef 254 gfs2_sb_in(sdp, p);
9b8df98f
SW
255 kunmap(page);
256 __free_page(page);
32e471ef 257 return gfs2_check_sb(sdp, silent);
9b8df98f 258}
6bac243f 259
9b8df98f
SW
260/**
261 * gfs2_read_sb - Read super block
262 * @sdp: The GFS2 superblock
9b8df98f
SW
263 * @silent: Don't print message if mount fails
264 *
265 */
266
6bac243f 267static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
9b8df98f
SW
268{
269 u32 hash_blocks, ind_blocks, leaf_blocks;
270 u32 tmp_blocks;
271 unsigned int x;
272 int error;
273
32e471ef 274 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
9b8df98f
SW
275 if (error) {
276 if (!silent)
277 fs_err(sdp, "can't read superblock\n");
278 return error;
279 }
280
9b8df98f
SW
281 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
282 GFS2_BASIC_BLOCK_SHIFT;
283 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
284 sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
285 sizeof(struct gfs2_dinode)) / sizeof(u64);
286 sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
287 sizeof(struct gfs2_meta_header)) / sizeof(u64);
288 sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
289 sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
290 sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
291 sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
292 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
293 sizeof(struct gfs2_meta_header)) /
294 sizeof(struct gfs2_quota_change);
a68a0a35
BP
295 sdp->sd_blocks_per_bitmap = (sdp->sd_sb.sb_bsize -
296 sizeof(struct gfs2_meta_header))
297 * GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */
9b8df98f
SW
298
299 /* Compute maximum reservation required to add a entry to a directory */
300
301 hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
302 sdp->sd_jbsize);
303
304 ind_blocks = 0;
305 for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
306 tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
307 ind_blocks += tmp_blocks;
308 }
309
310 leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
311
312 sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
313
314 sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
315 sizeof(struct gfs2_dinode);
316 sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
317 for (x = 2;; x++) {
318 u64 space, d;
319 u32 m;
320
321 space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
322 d = space;
323 m = do_div(d, sdp->sd_inptrs);
324
325 if (d != sdp->sd_heightsize[x - 1] || m)
326 break;
327 sdp->sd_heightsize[x] = space;
328 }
329 sdp->sd_max_height = x;
330 sdp->sd_heightsize[x] = ~0;
331 gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
332
333 sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
334 sizeof(struct gfs2_dinode);
335 sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
336 for (x = 2;; x++) {
337 u64 space, d;
338 u32 m;
339
340 space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
341 d = space;
342 m = do_div(d, sdp->sd_inptrs);
343
344 if (d != sdp->sd_jheightsize[x - 1] || m)
345 break;
346 sdp->sd_jheightsize[x] = space;
347 }
348 sdp->sd_max_jheight = x;
349 sdp->sd_jheightsize[x] = ~0;
350 gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
351
352 return 0;
353}
354
b3b94faa
DT
355static int init_names(struct gfs2_sbd *sdp, int silent)
356{
b3b94faa
DT
357 char *proto, *table;
358 int error = 0;
359
360 proto = sdp->sd_args.ar_lockproto;
361 table = sdp->sd_args.ar_locktable;
362
363 /* Try to autodetect */
364
365 if (!proto[0] || !table[0]) {
32e471ef 366 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
bb8d8a6f
SW
367 if (error)
368 return error;
3cf1e7be 369
b3b94faa 370 if (!proto[0])
3cf1e7be 371 proto = sdp->sd_sb.sb_lockproto;
b3b94faa 372 if (!table[0])
3cf1e7be 373 table = sdp->sd_sb.sb_locktable;
b3b94faa
DT
374 }
375
376 if (!table[0])
377 table = sdp->sd_vfs->s_id;
378
00377d8e
JD
379 strlcpy(sdp->sd_proto_name, proto, GFS2_FSNAME_LEN);
380 strlcpy(sdp->sd_table_name, table, GFS2_FSNAME_LEN);
b3b94faa 381
5d35e31f
DC
382 table = sdp->sd_table_name;
383 while ((table = strchr(table, '/')))
b35997d4
RP
384 *table = '_';
385
b3b94faa
DT
386 return error;
387}
388
389static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
390 int undo)
391{
b3b94faa
DT
392 int error = 0;
393
394 if (undo)
395 goto fail_trans;
396
b3b94faa
DT
397 error = gfs2_glock_nq_num(sdp,
398 GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
399 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
400 mount_gh);
401 if (error) {
402 fs_err(sdp, "can't acquire mount glock: %d\n", error);
403 goto fail;
404 }
405
406 error = gfs2_glock_nq_num(sdp,
407 GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
408 LM_ST_SHARED,
579b78a4 409 LM_FLAG_NOEXP | GL_EXACT,
b3b94faa
DT
410 &sdp->sd_live_gh);
411 if (error) {
412 fs_err(sdp, "can't acquire live glock: %d\n", error);
413 goto fail_mount;
414 }
415
416 error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops,
417 CREATE, &sdp->sd_rename_gl);
418 if (error) {
419 fs_err(sdp, "can't create rename glock: %d\n", error);
420 goto fail_live;
421 }
422
423 error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops,
424 CREATE, &sdp->sd_trans_gl);
425 if (error) {
426 fs_err(sdp, "can't create transaction glock: %d\n", error);
427 goto fail_rename;
428 }
b3b94faa
DT
429
430 return 0;
431
feaa7bba 432fail_trans:
b3b94faa 433 gfs2_glock_put(sdp->sd_trans_gl);
feaa7bba 434fail_rename:
b3b94faa 435 gfs2_glock_put(sdp->sd_rename_gl);
feaa7bba 436fail_live:
b3b94faa 437 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
feaa7bba 438fail_mount:
b3b94faa 439 gfs2_glock_dq_uninit(mount_gh);
feaa7bba 440fail:
b3b94faa
DT
441 return error;
442}
443
9b8df98f
SW
444static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
445 u64 no_addr, const char *name)
f42faf4f 446{
9b8df98f
SW
447 struct gfs2_sbd *sdp = sb->s_fs_info;
448 struct dentry *dentry;
449 struct inode *inode;
450
44ad37d6 451 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
9b8df98f
SW
452 if (IS_ERR(inode)) {
453 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
454 return PTR_ERR(inode);
455 }
48fde701 456 dentry = d_make_root(inode);
9b8df98f
SW
457 if (!dentry) {
458 fs_err(sdp, "can't alloc %s dentry\n", name);
9b8df98f
SW
459 return -ENOMEM;
460 }
9b8df98f
SW
461 *dptr = dentry;
462 return 0;
f42faf4f
SW
463}
464
9b8df98f 465static int init_sb(struct gfs2_sbd *sdp, int silent)
b3b94faa
DT
466{
467 struct super_block *sb = sdp->sd_vfs;
468 struct gfs2_holder sb_gh;
dbb7cae2 469 u64 no_addr;
9b8df98f 470 int ret;
b3b94faa 471
9b8df98f
SW
472 ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops,
473 LM_ST_SHARED, 0, &sb_gh);
474 if (ret) {
475 fs_err(sdp, "can't acquire superblock glock: %d\n", ret);
476 return ret;
b3b94faa 477 }
907b9bce 478
6bac243f 479 ret = gfs2_read_sb(sdp, silent);
9b8df98f
SW
480 if (ret) {
481 fs_err(sdp, "can't read superblock: %d\n", ret);
b3b94faa
DT
482 goto out;
483 }
484
485 /* Set up the buffer cache and SB for real */
e1defc4f 486 if (sdp->sd_sb.sb_bsize < bdev_logical_block_size(sb->s_bdev)) {
9b8df98f 487 ret = -EINVAL;
b3b94faa
DT
488 fs_err(sdp, "FS block size (%u) is too small for device "
489 "block size (%u)\n",
e1defc4f 490 sdp->sd_sb.sb_bsize, bdev_logical_block_size(sb->s_bdev));
b3b94faa
DT
491 goto out;
492 }
493 if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
9b8df98f 494 ret = -EINVAL;
b3b94faa
DT
495 fs_err(sdp, "FS block size (%u) is too big for machine "
496 "page size (%u)\n",
497 sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
498 goto out;
499 }
b3b94faa
DT
500 sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
501
f42faf4f 502 /* Get the root inode */
dbb7cae2 503 no_addr = sdp->sd_sb.sb_root_dir.no_addr;
9b8df98f
SW
504 ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root");
505 if (ret)
f42faf4f 506 goto out;
b3b94faa 507
9b8df98f
SW
508 /* Get the master inode */
509 no_addr = sdp->sd_sb.sb_master_dir.no_addr;
510 ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master");
511 if (ret) {
512 dput(sdp->sd_root_dir);
513 goto out;
514 }
515 sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir);
f42faf4f 516out:
b3b94faa 517 gfs2_glock_dq_uninit(&sb_gh);
9b8df98f 518 return ret;
b3b94faa
DT
519}
520
da6dd40d
BP
521/**
522 * map_journal_extents - create a reusable "extent" mapping from all logical
523 * blocks to all physical blocks for the given journal. This will save
524 * us time when writing journal blocks. Most journals will have only one
525 * extent that maps all their logical blocks. That's because gfs2.mkfs
526 * arranges the journal blocks sequentially to maximize performance.
527 * So the extent would map the first block for the entire file length.
528 * However, gfs2_jadd can happen while file activity is happening, so
529 * those journals may not be sequential. Less likely is the case where
530 * the users created their own journals by mounting the metafs and
531 * laying it out. But it's still possible. These journals might have
532 * several extents.
533 *
534 * TODO: This should be done in bigger chunks rather than one block at a time,
535 * but since it's only done at mount time, I'm not worried about the
536 * time it takes.
537 */
538static int map_journal_extents(struct gfs2_sbd *sdp)
539{
540 struct gfs2_jdesc *jd = sdp->sd_jdesc;
541 unsigned int lb;
542 u64 db, prev_db; /* logical block, disk block, prev disk block */
543 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
544 struct gfs2_journal_extent *jext = NULL;
545 struct buffer_head bh;
546 int rc = 0;
547
da6dd40d
BP
548 prev_db = 0;
549
a2e0f799 550 for (lb = 0; lb < i_size_read(jd->jd_inode) >> sdp->sd_sb.sb_bsize_shift; lb++) {
da6dd40d
BP
551 bh.b_state = 0;
552 bh.b_blocknr = 0;
553 bh.b_size = 1 << ip->i_inode.i_blkbits;
554 rc = gfs2_block_map(jd->jd_inode, lb, &bh, 0);
555 db = bh.b_blocknr;
556 if (rc || !db) {
557 printk(KERN_INFO "GFS2 journal mapping error %d: lb="
558 "%u db=%llu\n", rc, lb, (unsigned long long)db);
559 break;
560 }
561 if (!prev_db || db != prev_db + 1) {
562 jext = kzalloc(sizeof(struct gfs2_journal_extent),
563 GFP_KERNEL);
564 if (!jext) {
565 printk(KERN_INFO "GFS2 error: out of memory "
566 "mapping journal extents.\n");
567 rc = -ENOMEM;
568 break;
569 }
570 jext->dblock = db;
571 jext->lblock = lb;
572 jext->blocks = 1;
573 list_add_tail(&jext->extent_list, &jd->extent_list);
574 } else {
575 jext->blocks++;
576 }
577 prev_db = db;
578 }
579 return rc;
580}
581
f057f6cd 582static void gfs2_others_may_mount(struct gfs2_sbd *sdp)
da755fdb 583{
f057f6cd
SW
584 char *message = "FIRSTMOUNT=Done";
585 char *envp[] = { message, NULL };
e0c2a9aa
DT
586
587 fs_info(sdp, "first mount done, others may mount\n");
588
589 if (sdp->sd_lockstruct.ls_ops->lm_first_done)
590 sdp->sd_lockstruct.ls_ops->lm_first_done(sdp);
591
f057f6cd 592 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
da755fdb
SW
593}
594
b5289681
SW
595/**
596 * gfs2_jindex_hold - Grab a lock on the jindex
597 * @sdp: The GFS2 superblock
598 * @ji_gh: the holder for the jindex glock
599 *
600 * Returns: errno
601 */
602
603static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
604{
605 struct gfs2_inode *dip = GFS2_I(sdp->sd_jindex);
606 struct qstr name;
607 char buf[20];
608 struct gfs2_jdesc *jd;
609 int error;
610
611 name.name = buf;
612
613 mutex_lock(&sdp->sd_jindex_mutex);
614
615 for (;;) {
616 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
617 if (error)
618 break;
619
620 name.len = sprintf(buf, "journal%u", sdp->sd_journals);
621 name.hash = gfs2_disk_hash(name.name, name.len);
622
623 error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
624 if (error == -ENOENT) {
625 error = 0;
626 break;
627 }
628
629 gfs2_glock_dq_uninit(ji_gh);
630
631 if (error)
632 break;
633
634 error = -ENOMEM;
635 jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
636 if (!jd)
637 break;
638
639 INIT_LIST_HEAD(&jd->extent_list);
6ecd7c2d 640 INIT_WORK(&jd->jd_work, gfs2_recover_func);
b5289681
SW
641 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
642 if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
643 if (!jd->jd_inode)
644 error = -ENOENT;
645 else
646 error = PTR_ERR(jd->jd_inode);
647 kfree(jd);
648 break;
649 }
650
651 spin_lock(&sdp->sd_jindex_spin);
652 jd->jd_jid = sdp->sd_journals++;
653 list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
654 spin_unlock(&sdp->sd_jindex_spin);
655 }
656
657 mutex_unlock(&sdp->sd_jindex_mutex);
658
659 return error;
660}
661
1d12d175
BP
662/**
663 * check_journal_clean - Make sure a journal is clean for a spectator mount
664 * @sdp: The GFS2 superblock
665 * @jd: The journal descriptor
666 *
667 * Returns: 0 if the journal is clean or locked, else an error
668 */
669static int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
670{
671 int error;
672 struct gfs2_holder j_gh;
673 struct gfs2_log_header_host head;
674 struct gfs2_inode *ip;
675
676 ip = GFS2_I(jd->jd_inode);
677 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
678 GL_EXACT | GL_NOCACHE, &j_gh);
679 if (error) {
680 fs_err(sdp, "Error locking journal for spectator mount.\n");
681 return -EPERM;
682 }
683 error = gfs2_jdesc_check(jd);
684 if (error) {
685 fs_err(sdp, "Error checking journal for spectator mount.\n");
686 goto out_unlock;
687 }
688 error = gfs2_find_jhead(jd, &head);
689 if (error) {
690 fs_err(sdp, "Error parsing journal for spectator mount.\n");
691 goto out_unlock;
692 }
693 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
694 error = -EPERM;
695 fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter "
696 "must not be a spectator.\n", jd->jd_jid);
697 }
698
699out_unlock:
700 gfs2_glock_dq_uninit(&j_gh);
701 return error;
702}
703
b3b94faa
DT
704static int init_journal(struct gfs2_sbd *sdp, int undo)
705{
9b8df98f 706 struct inode *master = sdp->sd_master_dir->d_inode;
b3b94faa 707 struct gfs2_holder ji_gh;
5c676f6d 708 struct gfs2_inode *ip;
b3b94faa
DT
709 int jindex = 1;
710 int error = 0;
711
712 if (undo) {
713 jindex = 0;
fe64d517 714 goto fail_jinode_gh;
b3b94faa
DT
715 }
716
9b8df98f 717 sdp->sd_jindex = gfs2_lookup_simple(master, "jindex");
c752666c 718 if (IS_ERR(sdp->sd_jindex)) {
b3b94faa 719 fs_err(sdp, "can't lookup journal index: %d\n", error);
c752666c 720 return PTR_ERR(sdp->sd_jindex);
b3b94faa 721 }
b3b94faa
DT
722
723 /* Load in the journal index special file */
724
725 error = gfs2_jindex_hold(sdp, &ji_gh);
726 if (error) {
727 fs_err(sdp, "can't read journal index: %d\n", error);
728 goto fail;
729 }
730
0e5a9fb0 731 error = -EUSERS;
b3b94faa
DT
732 if (!gfs2_jindex_size(sdp)) {
733 fs_err(sdp, "no journals!\n");
907b9bce 734 goto fail_jindex;
b3b94faa
DT
735 }
736
737 if (sdp->sd_args.ar_spectator) {
738 sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
fd041f0b 739 atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
5e687eac
BM
740 atomic_set(&sdp->sd_log_thresh1, 2*sdp->sd_jdesc->jd_blocks/5);
741 atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5);
b3b94faa
DT
742 } else {
743 if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
744 fs_err(sdp, "can't mount journal #%u\n",
745 sdp->sd_lockstruct.ls_jid);
746 fs_err(sdp, "there are only %u journals (0 - %u)\n",
747 gfs2_jindex_size(sdp),
748 gfs2_jindex_size(sdp) - 1);
749 goto fail_jindex;
750 }
751 sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid);
752
feaa7bba 753 error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid,
b3b94faa
DT
754 &gfs2_journal_glops,
755 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
756 &sdp->sd_journal_gh);
757 if (error) {
758 fs_err(sdp, "can't acquire journal glock: %d\n", error);
759 goto fail_jindex;
760 }
761
feaa7bba
SW
762 ip = GFS2_I(sdp->sd_jdesc->jd_inode);
763 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
75be73a8 764 LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE,
b3b94faa
DT
765 &sdp->sd_jinode_gh);
766 if (error) {
767 fs_err(sdp, "can't acquire journal inode glock: %d\n",
768 error);
769 goto fail_journal_gh;
770 }
771
772 error = gfs2_jdesc_check(sdp->sd_jdesc);
773 if (error) {
774 fs_err(sdp, "my journal (%u) is bad: %d\n",
775 sdp->sd_jdesc->jd_jid, error);
776 goto fail_jinode_gh;
777 }
fd041f0b 778 atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
5e687eac
BM
779 atomic_set(&sdp->sd_log_thresh1, 2*sdp->sd_jdesc->jd_blocks/5);
780 atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5);
da6dd40d
BP
781
782 /* Map the extents for this journal's blocks */
783 map_journal_extents(sdp);
b3b94faa 784 }
63997775 785 trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free));
b3b94faa
DT
786
787 if (sdp->sd_lockstruct.ls_first) {
788 unsigned int x;
789 for (x = 0; x < sdp->sd_journals; x++) {
1d12d175
BP
790 struct gfs2_jdesc *jd = gfs2_jdesc_find(sdp, x);
791
792 if (sdp->sd_args.ar_spectator) {
793 error = check_journal_clean(sdp, jd);
794 if (error)
795 goto fail_jinode_gh;
796 continue;
797 }
798 error = gfs2_recover_journal(jd, true);
b3b94faa
DT
799 if (error) {
800 fs_err(sdp, "error recovering journal %u: %d\n",
801 x, error);
802 goto fail_jinode_gh;
803 }
804 }
805
f057f6cd 806 gfs2_others_may_mount(sdp);
b3b94faa 807 } else if (!sdp->sd_args.ar_spectator) {
6ecd7c2d 808 error = gfs2_recover_journal(sdp->sd_jdesc, true);
b3b94faa
DT
809 if (error) {
810 fs_err(sdp, "error recovering my journal: %d\n", error);
811 goto fail_jinode_gh;
812 }
813 }
814
815 set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
816 gfs2_glock_dq_uninit(&ji_gh);
817 jindex = 0;
818
b3b94faa
DT
819 return 0;
820
a91ea69f 821fail_jinode_gh:
b3b94faa
DT
822 if (!sdp->sd_args.ar_spectator)
823 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
a91ea69f 824fail_journal_gh:
b3b94faa
DT
825 if (!sdp->sd_args.ar_spectator)
826 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
a91ea69f 827fail_jindex:
b3b94faa
DT
828 gfs2_jindex_free(sdp);
829 if (jindex)
830 gfs2_glock_dq_uninit(&ji_gh);
a91ea69f 831fail:
f42faf4f 832 iput(sdp->sd_jindex);
b3b94faa
DT
833 return error;
834}
835
56aa72d0 836static struct lock_class_key gfs2_quota_imutex_key;
b3b94faa
DT
837
838static int init_inodes(struct gfs2_sbd *sdp, int undo)
839{
b3b94faa 840 int error = 0;
9b8df98f 841 struct inode *master = sdp->sd_master_dir->d_inode;
b3b94faa
DT
842
843 if (undo)
f42faf4f
SW
844 goto fail_qinode;
845
f42faf4f
SW
846 error = init_journal(sdp, undo);
847 if (error)
9b8df98f 848 goto fail;
b3b94faa 849
b3b94faa 850 /* Read in the master statfs inode */
9b8df98f 851 sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
c752666c
SW
852 if (IS_ERR(sdp->sd_statfs_inode)) {
853 error = PTR_ERR(sdp->sd_statfs_inode);
b3b94faa 854 fs_err(sdp, "can't read in statfs inode: %d\n", error);
8d8291ae 855 goto fail_journal;
b3b94faa
DT
856 }
857
858 /* Read in the resource index inode */
9b8df98f 859 sdp->sd_rindex = gfs2_lookup_simple(master, "rindex");
c752666c
SW
860 if (IS_ERR(sdp->sd_rindex)) {
861 error = PTR_ERR(sdp->sd_rindex);
b3b94faa
DT
862 fs_err(sdp, "can't get resource index inode: %d\n", error);
863 goto fail_statfs;
864 }
cf45b752 865 sdp->sd_rindex_uptodate = 0;
b3b94faa
DT
866
867 /* Read in the quota inode */
9b8df98f 868 sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota");
c752666c
SW
869 if (IS_ERR(sdp->sd_quota_inode)) {
870 error = PTR_ERR(sdp->sd_quota_inode);
b3b94faa
DT
871 fs_err(sdp, "can't get quota file inode: %d\n", error);
872 goto fail_rindex;
873 }
56aa72d0
JK
874 /*
875 * i_mutex on quota files is special. Since this inode is hidden system
876 * file, we are safe to define locking ourselves.
877 */
878 lockdep_set_class(&sdp->sd_quota_inode->i_mutex,
879 &gfs2_quota_imutex_key);
a365fbf3
SW
880
881 error = gfs2_rindex_update(sdp);
882 if (error)
883 goto fail_qinode;
884
b3b94faa
DT
885 return 0;
886
f42faf4f
SW
887fail_qinode:
888 iput(sdp->sd_quota_inode);
f42faf4f 889fail_rindex:
b3b94faa 890 gfs2_clear_rgrpd(sdp);
f42faf4f 891 iput(sdp->sd_rindex);
f42faf4f
SW
892fail_statfs:
893 iput(sdp->sd_statfs_inode);
f42faf4f
SW
894fail_journal:
895 init_journal(sdp, UNDO);
f42faf4f 896fail:
b3b94faa
DT
897 return error;
898}
899
900static int init_per_node(struct gfs2_sbd *sdp, int undo)
901{
f42faf4f 902 struct inode *pn = NULL;
b3b94faa
DT
903 char buf[30];
904 int error = 0;
5c676f6d 905 struct gfs2_inode *ip;
9b8df98f 906 struct inode *master = sdp->sd_master_dir->d_inode;
b3b94faa
DT
907
908 if (sdp->sd_args.ar_spectator)
909 return 0;
910
911 if (undo)
912 goto fail_qc_gh;
913
9b8df98f 914 pn = gfs2_lookup_simple(master, "per_node");
c752666c
SW
915 if (IS_ERR(pn)) {
916 error = PTR_ERR(pn);
b3b94faa
DT
917 fs_err(sdp, "can't find per_node directory: %d\n", error);
918 return error;
919 }
920
b3b94faa 921 sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
c752666c
SW
922 sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf);
923 if (IS_ERR(sdp->sd_sc_inode)) {
924 error = PTR_ERR(sdp->sd_sc_inode);
b3b94faa 925 fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
8d8291ae 926 goto fail;
b3b94faa
DT
927 }
928
b3b94faa 929 sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
c752666c
SW
930 sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf);
931 if (IS_ERR(sdp->sd_qc_inode)) {
932 error = PTR_ERR(sdp->sd_qc_inode);
b3b94faa
DT
933 fs_err(sdp, "can't find local \"qc\" file: %d\n", error);
934 goto fail_ut_i;
935 }
936
f42faf4f 937 iput(pn);
b3b94faa
DT
938 pn = NULL;
939
feaa7bba 940 ip = GFS2_I(sdp->sd_sc_inode);
8d8291ae 941 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
b3b94faa
DT
942 &sdp->sd_sc_gh);
943 if (error) {
944 fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
8d8291ae 945 goto fail_qc_i;
b3b94faa
DT
946 }
947
feaa7bba 948 ip = GFS2_I(sdp->sd_qc_inode);
8d8291ae 949 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
b3b94faa
DT
950 &sdp->sd_qc_gh);
951 if (error) {
952 fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
953 goto fail_ut_gh;
954 }
955
956 return 0;
957
a91ea69f 958fail_qc_gh:
b3b94faa 959 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
a91ea69f 960fail_ut_gh:
b3b94faa 961 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
a91ea69f 962fail_qc_i:
f42faf4f 963 iput(sdp->sd_qc_inode);
a91ea69f 964fail_ut_i:
f42faf4f 965 iput(sdp->sd_sc_inode);
a91ea69f 966fail:
b3b94faa 967 if (pn)
f42faf4f 968 iput(pn);
b3b94faa
DT
969 return error;
970}
971
972static int init_threads(struct gfs2_sbd *sdp, int undo)
973{
974 struct task_struct *p;
975 int error = 0;
976
977 if (undo)
feaa7bba 978 goto fail_quotad;
b3b94faa 979
b3b94faa 980 p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
a9aefd70
AK
981 if (IS_ERR(p)) {
982 error = PTR_ERR(p);
b3b94faa
DT
983 fs_err(sdp, "can't start logd thread: %d\n", error);
984 return error;
985 }
986 sdp->sd_logd_process = p;
987
b3b94faa 988 p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
a9aefd70
AK
989 if (IS_ERR(p)) {
990 error = PTR_ERR(p);
b3b94faa
DT
991 fs_err(sdp, "can't start quotad thread: %d\n", error);
992 goto fail;
993 }
994 sdp->sd_quotad_process = p;
995
b3b94faa
DT
996 return 0;
997
b3b94faa 998
feaa7bba 999fail_quotad:
b3b94faa 1000 kthread_stop(sdp->sd_quotad_process);
feaa7bba 1001fail:
b3b94faa 1002 kthread_stop(sdp->sd_logd_process);
b3b94faa
DT
1003 return error;
1004}
da755fdb 1005
f057f6cd
SW
1006static const match_table_t nolock_tokens = {
1007 { Opt_jid, "jid=%d\n", },
1008 { Opt_err, NULL },
1009};
1010
1011static const struct lm_lockops nolock_ops = {
1012 .lm_proto_name = "lock_nolock",
fc0e38da 1013 .lm_put_lock = gfs2_glock_free,
f057f6cd
SW
1014 .lm_tokens = &nolock_tokens,
1015};
1016
da755fdb
SW
1017/**
1018 * gfs2_lm_mount - mount a locking protocol
1019 * @sdp: the filesystem
3ad2f3fb 1020 * @args: mount arguments
da755fdb
SW
1021 * @silent: if 1, don't complain if the FS isn't a GFS2 fs
1022 *
1023 * Returns: errno
1024 */
1025
1026static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
1027{
f057f6cd
SW
1028 const struct lm_lockops *lm;
1029 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1030 struct gfs2_args *args = &sdp->sd_args;
1031 const char *proto = sdp->sd_proto_name;
1032 const char *table = sdp->sd_table_name;
f057f6cd
SW
1033 char *o, *options;
1034 int ret;
da755fdb 1035
f057f6cd
SW
1036 if (!strcmp("lock_nolock", proto)) {
1037 lm = &nolock_ops;
1038 sdp->sd_args.ar_localflocks = 1;
f057f6cd
SW
1039#ifdef CONFIG_GFS2_FS_LOCKING_DLM
1040 } else if (!strcmp("lock_dlm", proto)) {
1041 lm = &gfs2_dlm_ops;
1042#endif
1043 } else {
1044 printk(KERN_INFO "GFS2: can't find protocol %s\n", proto);
1045 return -ENOENT;
1046 }
da755fdb
SW
1047
1048 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
1049
f057f6cd
SW
1050 ls->ls_ops = lm;
1051 ls->ls_first = 1;
da755fdb 1052
f057f6cd
SW
1053 for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) {
1054 substring_t tmp[MAX_OPT_ARGS];
1055 int token, option;
1056
1057 if (!o || !*o)
1058 continue;
1059
1060 token = match_token(o, *lm->lm_tokens, tmp);
1061 switch (token) {
1062 case Opt_jid:
1063 ret = match_int(&tmp[0], &option);
1064 if (ret || option < 0)
1065 goto hostdata_error;
ba6e9364
SW
1066 if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags))
1067 ls->ls_jid = option;
f057f6cd
SW
1068 break;
1069 case Opt_id:
4875647a 1070 case Opt_nodir:
2b88f7c5 1071 /* Obsolete, but left for backward compat purposes */
f057f6cd
SW
1072 break;
1073 case Opt_first:
1074 ret = match_int(&tmp[0], &option);
1075 if (ret || (option != 0 && option != 1))
1076 goto hostdata_error;
1077 ls->ls_first = option;
1078 break;
f057f6cd
SW
1079 case Opt_err:
1080 default:
1081hostdata_error:
1082 fs_info(sdp, "unknown hostdata (%s)\n", o);
1083 return -EINVAL;
1084 }
da755fdb
SW
1085 }
1086
f057f6cd
SW
1087 if (lm->lm_mount == NULL) {
1088 fs_info(sdp, "Now mounting FS...\n");
19237039 1089 complete_all(&sdp->sd_locking_init);
f057f6cd 1090 return 0;
da755fdb 1091 }
e0c2a9aa 1092 ret = lm->lm_mount(sdp, table);
f057f6cd
SW
1093 if (ret == 0)
1094 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
19237039 1095 complete_all(&sdp->sd_locking_init);
f057f6cd 1096 return ret;
da755fdb
SW
1097}
1098
1099void gfs2_lm_unmount(struct gfs2_sbd *sdp)
1100{
f057f6cd
SW
1101 const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops;
1102 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
1103 lm->lm_unmount)
1104 lm->lm_unmount(sdp);
da755fdb 1105}
b3b94faa 1106
ba6e9364
SW
1107static int gfs2_journalid_wait(void *word)
1108{
1109 if (signal_pending(current))
1110 return -EINTR;
1111 schedule();
1112 return 0;
1113}
1114
1115static int wait_on_journal(struct gfs2_sbd *sdp)
1116{
ba6e9364
SW
1117 if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
1118 return 0;
1119
1120 return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE);
1121}
1122
8633ecfa
SW
1123void gfs2_online_uevent(struct gfs2_sbd *sdp)
1124{
1125 struct super_block *sb = sdp->sd_vfs;
1126 char ro[20];
1127 char spectator[20];
1128 char *envp[] = { ro, spectator, NULL };
1129 sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
1130 sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
1131 kobject_uevent_env(&sdp->sd_kobj, KOBJ_ONLINE, envp);
1132}
1133
b3b94faa
DT
1134/**
1135 * fill_super - Read in superblock
1136 * @sb: The VFS superblock
1137 * @data: Mount options
1138 * @silent: Don't complain if it's not a GFS2 filesystem
1139 *
1140 * Returns: errno
1141 */
1142
f55073ff 1143static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent)
b3b94faa
DT
1144{
1145 struct gfs2_sbd *sdp;
1146 struct gfs2_holder mount_gh;
1147 int error;
1148
1149 sdp = init_sbd(sb);
1150 if (!sdp) {
d92a8d48 1151 printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n");
b3b94faa
DT
1152 return -ENOMEM;
1153 }
f55073ff 1154 sdp->sd_args = *args;
b3b94faa 1155
fe64d517 1156 if (sdp->sd_args.ar_spectator) {
6f04c1c7 1157 sb->s_flags |= MS_RDONLY;
e8ca5cc5 1158 set_bit(SDF_RORECOVERY, &sdp->sd_flags);
fe64d517 1159 }
6f04c1c7
SW
1160 if (sdp->sd_args.ar_posix_acl)
1161 sb->s_flags |= MS_POSIXACL;
f25934c5
CH
1162 if (sdp->sd_args.ar_nobarrier)
1163 set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
6f04c1c7 1164
9964afbb 1165 sb->s_flags |= MS_NOSEC;
719ee344
SW
1166 sb->s_magic = GFS2_MAGIC;
1167 sb->s_op = &gfs2_super_ops;
41ced6dc 1168 sb->s_d_op = &gfs2_dops;
719ee344 1169 sb->s_export_op = &gfs2_export_ops;
40b78a32 1170 sb->s_xattr = gfs2_xattr_handlers;
cc632e7f
SW
1171 sb->s_qcop = &gfs2_quotactl_ops;
1172 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
719ee344
SW
1173 sb->s_time_gran = 1;
1174 sb->s_maxbytes = MAX_LFS_FILESIZE;
419c93e0
SW
1175
1176 /* Set up the buffer cache and fill in some fake block size values
1177 to allow us to read-in the on-disk superblock. */
1178 sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK);
1179 sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
1180 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
1181 GFS2_BASIC_BLOCK_SHIFT;
1182 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
b3b94faa 1183
5e687eac 1184 sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit;
3d3c10f2
BM
1185 sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum;
1186 if (sdp->sd_args.ar_statfs_quantum) {
1187 sdp->sd_tune.gt_statfs_slow = 0;
1188 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum;
32e471ef 1189 } else {
3d3c10f2
BM
1190 sdp->sd_tune.gt_statfs_slow = 1;
1191 sdp->sd_tune.gt_statfs_quantum = 30;
1192 }
48c2b613 1193
b3b94faa 1194 error = init_names(sdp, silent);
0d515210
BP
1195 if (error) {
1196 /* In this case, we haven't initialized sysfs, so we have to
1197 manually free the sdp. */
1198 free_percpu(sdp->sd_lkstats);
1199 kfree(sdp);
1200 sb->s_fs_info = NULL;
1201 return error;
1202 }
b3b94faa 1203
e0c2a9aa
DT
1204 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
1205
b3b94faa 1206 error = gfs2_sys_fs_add(sdp);
0d515210
BP
1207 /*
1208 * If we hit an error here, gfs2_sys_fs_add will have called function
1209 * kobject_put which causes the sysfs usage count to go to zero, which
1210 * causes sysfs to call function gfs2_sbd_release, which frees sdp.
1211 * Subsequent error paths here will call gfs2_sys_fs_del, which also
1212 * kobject_put to free sdp.
1213 */
b3b94faa 1214 if (error)
0d515210
BP
1215 return error;
1216
1217 gfs2_create_debugfs_file(sdp);
b3b94faa
DT
1218
1219 error = gfs2_lm_mount(sdp, silent);
1220 if (error)
0d515210 1221 goto fail_debug;
b3b94faa
DT
1222
1223 error = init_locking(sdp, &mount_gh, DO);
1224 if (error)
1225 goto fail_lm;
1226
9b8df98f 1227 error = init_sb(sdp, silent);
b3b94faa
DT
1228 if (error)
1229 goto fail_locking;
b3b94faa 1230
ba6e9364
SW
1231 error = wait_on_journal(sdp);
1232 if (error)
1233 goto fail_sb;
1234
feb47ca9
SW
1235 /*
1236 * If user space has failed to join the cluster or some similar
1237 * failure has occurred, then the journal id will contain a
1238 * negative (error) number. This will then be returned to the
1239 * caller (of the mount syscall). We do this even for spectator
1240 * mounts (which just write a jid of 0 to indicate "ok" even though
1241 * the jid is unused in the spectator case)
1242 */
1243 if (sdp->sd_lockstruct.ls_jid < 0) {
1244 error = sdp->sd_lockstruct.ls_jid;
1245 sdp->sd_lockstruct.ls_jid = 0;
1246 goto fail_sb;
1247 }
1248
e0c2a9aa
DT
1249 if (sdp->sd_args.ar_spectator)
1250 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s",
1251 sdp->sd_table_name);
1252 else
1253 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u",
1254 sdp->sd_table_name, sdp->sd_lockstruct.ls_jid);
1255
b3b94faa
DT
1256 error = init_inodes(sdp, DO);
1257 if (error)
f42faf4f 1258 goto fail_sb;
b3b94faa
DT
1259
1260 error = init_per_node(sdp, DO);
1261 if (error)
1262 goto fail_inodes;
1263
1264 error = gfs2_statfs_init(sdp);
1265 if (error) {
1266 fs_err(sdp, "can't initialize statfs subsystem: %d\n", error);
1267 goto fail_per_node;
1268 }
1269
1270 error = init_threads(sdp, DO);
1271 if (error)
1272 goto fail_per_node;
1273
1274 if (!(sb->s_flags & MS_RDONLY)) {
1275 error = gfs2_make_fs_rw(sdp);
1276 if (error) {
1277 fs_err(sdp, "can't make FS RW: %d\n", error);
1278 goto fail_threads;
1279 }
1280 }
1281
1282 gfs2_glock_dq_uninit(&mount_gh);
8633ecfa 1283 gfs2_online_uevent(sdp);
b3b94faa
DT
1284 return 0;
1285
a91ea69f 1286fail_threads:
b3b94faa 1287 init_threads(sdp, UNDO);
a91ea69f 1288fail_per_node:
b3b94faa 1289 init_per_node(sdp, UNDO);
a91ea69f 1290fail_inodes:
b3b94faa 1291 init_inodes(sdp, UNDO);
a91ea69f 1292fail_sb:
9b8df98f
SW
1293 if (sdp->sd_root_dir)
1294 dput(sdp->sd_root_dir);
1295 if (sdp->sd_master_dir)
1296 dput(sdp->sd_master_dir);
e7c8707e
SW
1297 if (sb->s_root)
1298 dput(sb->s_root);
9b8df98f 1299 sb->s_root = NULL;
a91ea69f 1300fail_locking:
b3b94faa 1301 init_locking(sdp, &mount_gh, UNDO);
a91ea69f 1302fail_lm:
fefc03bf 1303 gfs2_gl_hash_clear(sdp);
b3b94faa 1304 gfs2_lm_unmount(sdp);
0d515210 1305fail_debug:
5f882096 1306 gfs2_delete_debugfs_file(sdp);
a245769f 1307 free_percpu(sdp->sd_lkstats);
0d515210
BP
1308 /* gfs2_sys_fs_del must be the last thing we do, since it causes
1309 * sysfs to call function gfs2_sbd_release, which frees sdp. */
1310 gfs2_sys_fs_del(sdp);
5c676f6d 1311 sb->s_fs_info = NULL;
b3b94faa
DT
1312 return error;
1313}
1314
f55073ff 1315static int set_gfs2_super(struct super_block *s, void *data)
b3b94faa 1316{
f55073ff
SW
1317 s->s_bdev = data;
1318 s->s_dev = s->s_bdev->bd_dev;
1319
1320 /*
1321 * We set the bdi here to the queue backing, file systems can
1322 * overwrite this in ->fill_super()
1323 */
1324 s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
1325 return 0;
86384605
AD
1326}
1327
f55073ff 1328static int test_gfs2_super(struct super_block *s, void *ptr)
f6d03139
SW
1329{
1330 struct block_device *bdev = ptr;
1331 return (bdev == s->s_bdev);
1332}
1333
f55073ff 1334/**
8bcbbf00 1335 * gfs2_mount - Get the GFS2 superblock
f55073ff
SW
1336 * @fs_type: The GFS2 filesystem type
1337 * @flags: Mount flags
1338 * @dev_name: The name of the device
1339 * @data: The mount arguments
f55073ff
SW
1340 *
1341 * Q. Why not use get_sb_bdev() ?
1342 * A. We need to select one of two root directories to mount, independent
1343 * of whether this is the initial, or subsequent, mount of this sb
1344 *
1345 * Returns: 0 or -ve on error
1346 */
1347
8bcbbf00
AV
1348static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1349 const char *dev_name, void *data)
f55073ff
SW
1350{
1351 struct block_device *bdev;
1352 struct super_block *s;
d4d77629 1353 fmode_t mode = FMODE_READ | FMODE_EXCL;
f55073ff
SW
1354 int error;
1355 struct gfs2_args args;
1356 struct gfs2_sbd *sdp;
1357
1358 if (!(flags & MS_RDONLY))
1359 mode |= FMODE_WRITE;
1360
d4d77629 1361 bdev = blkdev_get_by_path(dev_name, mode, fs_type);
f55073ff 1362 if (IS_ERR(bdev))
8bcbbf00 1363 return ERR_CAST(bdev);
f55073ff
SW
1364
1365 /*
1366 * once the super is inserted into the list by sget, s_umount
1367 * will protect the lockfs code from trying to start a snapshot
1368 * while we are mounting
1369 */
1370 mutex_lock(&bdev->bd_fsfreeze_mutex);
1371 if (bdev->bd_fsfreeze_count > 0) {
1372 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1373 error = -EBUSY;
1374 goto error_bdev;
1375 }
9249e17f 1376 s = sget(fs_type, test_gfs2_super, set_gfs2_super, flags, bdev);
f55073ff
SW
1377 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1378 error = PTR_ERR(s);
1379 if (IS_ERR(s))
1380 goto error_bdev;
1381
dfe5b9ad
SW
1382 if (s->s_root) {
1383 /*
1384 * s_umount nests inside bd_mutex during
1385 * __invalidate_device(). blkdev_put() acquires
1386 * bd_mutex and can't be called under s_umount. Drop
1387 * s_umount temporarily. This is safe as we're
1388 * holding an active reference.
1389 */
1390 up_write(&s->s_umount);
d4d77629 1391 blkdev_put(bdev, mode);
dfe5b9ad
SW
1392 down_write(&s->s_umount);
1393 }
8bcbbf00 1394
f55073ff
SW
1395 memset(&args, 0, sizeof(args));
1396 args.ar_quota = GFS2_QUOTA_DEFAULT;
1397 args.ar_data = GFS2_DATA_DEFAULT;
5e687eac 1398 args.ar_commit = 30;
3d3c10f2
BM
1399 args.ar_statfs_quantum = 30;
1400 args.ar_quota_quantum = 60;
f55073ff
SW
1401 args.ar_errors = GFS2_ERRORS_DEFAULT;
1402
1403 error = gfs2_mount_args(&args, data);
1404 if (error) {
1405 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
8bcbbf00 1406 goto error_super;
f55073ff
SW
1407 }
1408
1409 if (s->s_root) {
1410 error = -EBUSY;
1411 if ((flags ^ s->s_flags) & MS_RDONLY)
1412 goto error_super;
f55073ff
SW
1413 } else {
1414 char b[BDEVNAME_SIZE];
1415
f55073ff
SW
1416 s->s_mode = mode;
1417 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1418 sb_set_blocksize(s, block_size(bdev));
1419 error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
8bcbbf00
AV
1420 if (error)
1421 goto error_super;
f55073ff
SW
1422 s->s_flags |= MS_ACTIVE;
1423 bdev->bd_super = s;
1424 }
1425
1426 sdp = s->s_fs_info;
f55073ff 1427 if (args.ar_meta)
8bcbbf00 1428 return dget(sdp->sd_master_dir);
f55073ff 1429 else
8bcbbf00 1430 return dget(sdp->sd_root_dir);
f55073ff
SW
1431
1432error_super:
1433 deactivate_locked_super(s);
8bcbbf00 1434 return ERR_PTR(error);
f55073ff 1435error_bdev:
d4d77629 1436 blkdev_put(bdev, mode);
8bcbbf00 1437 return ERR_PTR(error);
f55073ff
SW
1438}
1439
f6d03139
SW
1440static int set_meta_super(struct super_block *s, void *ptr)
1441{
1442 return -EINVAL;
1443}
1444
8bcbbf00
AV
1445static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
1446 int flags, const char *dev_name, void *data)
86384605 1447{
f6d03139 1448 struct super_block *s;
003dec89 1449 struct gfs2_sbd *sdp;
e24977d4 1450 struct path path;
86384605 1451 int error;
907b9bce 1452
e24977d4 1453 error = kern_path(dev_name, LOOKUP_FOLLOW, &path);
86384605 1454 if (error) {
9b8df98f
SW
1455 printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
1456 dev_name, error);
8bcbbf00 1457 return ERR_PTR(error);
86384605 1458 }
9249e17f 1459 s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags,
f6d03139 1460 path.dentry->d_inode->i_sb->s_bdev);
e24977d4 1461 path_put(&path);
f6d03139 1462 if (IS_ERR(s)) {
86384605 1463 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
8bcbbf00 1464 return ERR_CAST(s);
86384605 1465 }
f55073ff
SW
1466 if ((flags ^ s->s_flags) & MS_RDONLY) {
1467 deactivate_locked_super(s);
8bcbbf00 1468 return ERR_PTR(-EBUSY);
f55073ff 1469 }
f6d03139 1470 sdp = s->s_fs_info;
8bcbbf00 1471 return dget(sdp->sd_master_dir);
b3b94faa
DT
1472}
1473
419c93e0 1474static void gfs2_kill_sb(struct super_block *sb)
86384605
AD
1475{
1476 struct gfs2_sbd *sdp = sb->s_fs_info;
88a19ad0
SW
1477
1478 if (sdp == NULL) {
1479 kill_block_super(sb);
1480 return;
3af165ac 1481 }
88a19ad0
SW
1482
1483 gfs2_meta_syncfs(sdp);
1484 dput(sdp->sd_root_dir);
1485 dput(sdp->sd_master_dir);
1486 sdp->sd_root_dir = NULL;
1487 sdp->sd_master_dir = NULL;
fefc03bf 1488 shrink_dcache_sb(sb);
88a19ad0 1489 gfs2_delete_debugfs_file(sdp);
a245769f 1490 free_percpu(sdp->sd_lkstats);
0d515210 1491 kill_block_super(sb);
86384605
AD
1492}
1493
b3b94faa
DT
1494struct file_system_type gfs2_fs_type = {
1495 .name = "gfs2",
1496 .fs_flags = FS_REQUIRES_DEV,
8bcbbf00 1497 .mount = gfs2_mount,
419c93e0
SW
1498 .kill_sb = gfs2_kill_sb,
1499 .owner = THIS_MODULE,
1500};
7f78e035 1501MODULE_ALIAS_FS("gfs2");
419c93e0
SW
1502
1503struct file_system_type gfs2meta_fs_type = {
1504 .name = "gfs2meta",
1505 .fs_flags = FS_REQUIRES_DEV,
8bcbbf00 1506 .mount = gfs2_mount_meta,
b3b94faa
DT
1507 .owner = THIS_MODULE,
1508};
7f78e035 1509MODULE_ALIAS_FS("gfs2meta");