[mirror_ubuntu-artful-kernel.git] / fs / ocfs2 / dlm / dlmfs.c

/* -*- mode: c; c-basic-offset: 8; -*-
 * vim: noexpandtab sw=8 ts=8 sts=0:
 *
 * dlmfs.c
 *
 * Code which implements the kernel side of a minimal userspace
 * interface to our DLM. This file handles the virtual file system
 * used for communication with userspace. Credit should go to ramfs,
 * which was a template for the fs side of this module.
 *
 * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

/* Simple VFS hooks based on: */
/*
 * Resizable simple ram filesystem for Linux.
 *
 * Copyright (C) 2000 Linus Torvalds.
 *               2000 Transmeta Corp.
 */

#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/backing-dev.h>

#include <asm/uaccess.h>


#include "cluster/nodemanager.h"
#include "cluster/heartbeat.h"
#include "cluster/tcp.h"

#include "dlmapi.h"

#include "userdlm.h"

#include "dlmfsver.h"

#define MLOG_MASK_PREFIX ML_DLMFS
#include "cluster/masklog.h"

static struct super_operations dlmfs_ops;
static struct file_operations dlmfs_file_operations;
static struct inode_operations dlmfs_dir_inode_operations;
static struct inode_operations dlmfs_root_inode_operations;
static struct inode_operations dlmfs_file_inode_operations;
static kmem_cache_t *dlmfs_inode_cache;

struct workqueue_struct *user_dlm_worker;

/*
 * decodes a set of open flags into a valid lock level and a set of flags.
 * returns < 0 if we have invalid flags
 * flags which mean something to us:
 * O_RDONLY -> PRMODE level
 * O_WRONLY -> EXMODE level
 *
 * O_NONBLOCK -> LKM_NOQUEUE
 */
static int dlmfs_decode_open_flags(int open_flags,
				   int *level,
				   int *flags)
{
	if (open_flags & (O_WRONLY|O_RDWR))
		*level = LKM_EXMODE;
	else
		*level = LKM_PRMODE;

	*flags = 0;
	if (open_flags & O_NONBLOCK)
		*flags |= LKM_NOQUEUE;

	return 0;
}

static int dlmfs_file_open(struct inode *inode,
			   struct file *file)
{
	int status, level, flags;
	struct dlmfs_filp_private *fp = NULL;
	struct dlmfs_inode_private *ip;

	if (S_ISDIR(inode->i_mode))
		BUG();

	mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino,
		file->f_flags);

	status = dlmfs_decode_open_flags(file->f_flags, &level, &flags);
	if (status < 0)
		goto bail;

	/* We don't want to honor O_APPEND at read/write time as it
	 * doesn't make sense for LVB writes. */
	file->f_flags &= ~O_APPEND;

	fp = kmalloc(sizeof(*fp), GFP_KERNEL);
	if (!fp) {
		status = -ENOMEM;
		goto bail;
	}
	fp->fp_lock_level = level;

	ip = DLMFS_I(inode);

	status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags);
	if (status < 0) {
		/* this is a strange error to return here but I want
		 * to be able userspace to be able to distinguish a
		 * valid lock request from one that simply couldn't be
		 * granted. */
		if (flags & LKM_NOQUEUE && status == -EAGAIN)
			status = -ETXTBSY;
		kfree(fp);
		goto bail;
	}

	file->private_data = fp;
bail:
	return status;
}

static int dlmfs_file_release(struct inode *inode,
			      struct file *file)
{
	int level, status;
	struct dlmfs_inode_private *ip = DLMFS_I(inode);
	struct dlmfs_filp_private *fp =
		(struct dlmfs_filp_private *) file->private_data;

	if (S_ISDIR(inode->i_mode))
		BUG();

	mlog(0, "close called on inode %lu\n", inode->i_ino);

	status = 0;
	if (fp) {
		level = fp->fp_lock_level;
		if (level != LKM_IVMODE)
			user_dlm_cluster_unlock(&ip->ip_lockres, level);

		kfree(fp);
		file->private_data = NULL;
	}

	return 0;
}

static ssize_t dlmfs_file_read(struct file *filp,
			       char __user *buf,
			       size_t count,
			       loff_t *ppos)
{
	int bytes_left;
	ssize_t readlen;
	char *lvb_buf;
	struct inode *inode = filp->f_dentry->d_inode;

	mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
		inode->i_ino, count, *ppos);

	if (*ppos >= i_size_read(inode))
		return 0;

	if (!count)
		return 0;

	if (!access_ok(VERIFY_WRITE, buf, count))
		return -EFAULT;

	/* don't read past the lvb */
	if ((count + *ppos) > i_size_read(inode))
		readlen = i_size_read(inode) - *ppos;
	else
		readlen = count - *ppos;

	lvb_buf = kmalloc(readlen, GFP_KERNEL);
	if (!lvb_buf)
		return -ENOMEM;

	user_dlm_read_lvb(inode, lvb_buf, readlen);
	bytes_left = __copy_to_user(buf, lvb_buf, readlen);
	readlen -= bytes_left;

	kfree(lvb_buf);

	*ppos = *ppos + readlen;

	mlog(0, "read %zd bytes\n", readlen);
	return readlen;
}

static ssize_t dlmfs_file_write(struct file *filp,
				const char __user *buf,
				size_t count,
				loff_t *ppos)
{
	int bytes_left;
	ssize_t writelen;
	char *lvb_buf;
	struct inode *inode = filp->f_dentry->d_inode;

	mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
		inode->i_ino, count, *ppos);

	if (*ppos >= i_size_read(inode))
		return -ENOSPC;

	if (!count)
		return 0;

	if (!access_ok(VERIFY_READ, buf, count))
		return -EFAULT;

	/* don't write past the lvb */
	if ((count + *ppos) > i_size_read(inode))
		writelen = i_size_read(inode) - *ppos;
	else
		writelen = count - *ppos;

	lvb_buf = kmalloc(writelen, GFP_KERNEL);
	if (!lvb_buf)
		return -ENOMEM;

	bytes_left = copy_from_user(lvb_buf, buf, writelen);
	writelen -= bytes_left;
	if (writelen)
		user_dlm_write_lvb(inode, lvb_buf, writelen);

	kfree(lvb_buf);

	*ppos = *ppos + writelen;
	mlog(0, "wrote %zd bytes\n", writelen);
	return writelen;
}

static void dlmfs_init_once(void *foo,
			    kmem_cache_t *cachep,
			    unsigned long flags)
{
	struct dlmfs_inode_private *ip =
		(struct dlmfs_inode_private *) foo;

	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
	    SLAB_CTOR_CONSTRUCTOR) {
		ip->ip_dlm = NULL;
		ip->ip_parent = NULL;

		inode_init_once(&ip->ip_vfs_inode);
	}
}

static struct inode *dlmfs_alloc_inode(struct super_block *sb)
{
	struct dlmfs_inode_private *ip;

	ip = kmem_cache_alloc(dlmfs_inode_cache, SLAB_NOFS);
	if (!ip)
		return NULL;

	return &ip->ip_vfs_inode;
}

static void dlmfs_destroy_inode(struct inode *inode)
{
	kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
}

static void dlmfs_clear_inode(struct inode *inode)
{
	int status;
	struct dlmfs_inode_private *ip;

	if (!inode)
		return;

	mlog(0, "inode %lu\n", inode->i_ino);

	ip = DLMFS_I(inode);

	if (S_ISREG(inode->i_mode)) {
		status = user_dlm_destroy_lock(&ip->ip_lockres);
		if (status < 0)
			mlog_errno(status);
		iput(ip->ip_parent);
		goto clear_fields;
	}

	mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm);
	/* we must be a directory. If required, lets unregister the
	 * dlm context now. */
	if (ip->ip_dlm)
		user_dlm_unregister_context(ip->ip_dlm);
clear_fields:
	ip->ip_parent = NULL;
	ip->ip_dlm = NULL;
}

static struct backing_dev_info dlmfs_backing_dev_info = {
	.ra_pages	= 0,	/* No readahead */
	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
};

static struct inode *dlmfs_get_root_inode(struct super_block *sb)
{
	struct inode *inode = new_inode(sb);
	int mode = S_IFDIR | 0755;
	struct dlmfs_inode_private *ip;

	if (inode) {
		ip = DLMFS_I(inode);

		inode->i_mode = mode;
		inode->i_uid = current->fsuid;
		inode->i_gid = current->fsgid;
		inode->i_blksize = PAGE_CACHE_SIZE;
		inode->i_blocks = 0;
		inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
		inode->i_nlink++;

		inode->i_fop = &simple_dir_operations;
		inode->i_op = &dlmfs_root_inode_operations;
	}

	return inode;
}

static struct inode *dlmfs_get_inode(struct inode *parent,
				     struct dentry *dentry,
				     int mode)
{
	struct super_block *sb = parent->i_sb;
	struct inode * inode = new_inode(sb);
	struct dlmfs_inode_private *ip;

	if (!inode)
		return NULL;

	inode->i_mode = mode;
	inode->i_uid = current->fsuid;
	inode->i_gid = current->fsgid;
	inode->i_blksize = PAGE_CACHE_SIZE;
	inode->i_blocks = 0;
	inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;

	ip = DLMFS_I(inode);
	ip->ip_dlm = DLMFS_I(parent)->ip_dlm;

	switch (mode & S_IFMT) {
	default:
		/* for now we don't support anything other than
		 * directories and regular files. */
		BUG();
		break;
	case S_IFREG:
		inode->i_op = &dlmfs_file_inode_operations;
		inode->i_fop = &dlmfs_file_operations;

		i_size_write(inode,  DLM_LVB_LEN);

		user_dlm_lock_res_init(&ip->ip_lockres, dentry);

		/* released at clear_inode time, this insures that we
		 * get to drop the dlm reference on each lock *before*
		 * we call the unregister code for releasing parent
		 * directories. */
		ip->ip_parent = igrab(parent);
		BUG_ON(!ip->ip_parent);
		break;
	case S_IFDIR:
		inode->i_op = &dlmfs_dir_inode_operations;
		inode->i_fop = &simple_dir_operations;

		/* directory inodes start off with i_nlink ==
		 * 2 (for "." entry) */
		inode->i_nlink++;
		break;
	}

	if (parent->i_mode & S_ISGID) {
		inode->i_gid = parent->i_gid;
		if (S_ISDIR(mode))
			inode->i_mode |= S_ISGID;
	}

	return inode;
}

/*
 * File creation. Allocate an inode, and we're done..
 */
/* SMP-safe */
static int dlmfs_mkdir(struct inode * dir,
		       struct dentry * dentry,
		       int mode)
{
	int status;
	struct inode *inode = NULL;
	struct qstr *domain = &dentry->d_name;
	struct dlmfs_inode_private *ip;
	struct dlm_ctxt *dlm;

	mlog(0, "mkdir %.*s\n", domain->len, domain->name);

	/* verify that we have a proper domain */
	if (domain->len >= O2NM_MAX_NAME_LEN) {
		status = -EINVAL;
		mlog(ML_ERROR, "invalid domain name for directory.\n");
		goto bail;
	}

	inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR);
	if (!inode) {
		status = -ENOMEM;
		mlog_errno(status);
		goto bail;
	}

	ip = DLMFS_I(inode);

	dlm = user_dlm_register_context(domain);
	if (IS_ERR(dlm)) {
		status = PTR_ERR(dlm);
		mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n",
		     status, domain->len, domain->name);
		goto bail;
	}
	ip->ip_dlm = dlm;

	dir->i_nlink++;
	d_instantiate(dentry, inode);
	dget(dentry);	/* Extra count - pin the dentry in core */

	status = 0;
bail:
	if (status < 0)
		iput(inode);
	return status;
}

static int dlmfs_create(struct inode *dir,
			struct dentry *dentry,
			int mode,
			struct nameidata *nd)
{
	int status = 0;
	struct inode *inode;
	struct qstr *name = &dentry->d_name;

	mlog(0, "create %.*s\n", name->len, name->name);

	/* verify name is valid and doesn't contain any dlm reserved
	 * characters */
	if (name->len >= USER_DLM_LOCK_ID_MAX_LEN ||
	    name->name[0] == '$') {
		status = -EINVAL;
		mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len,
		     name->name);
		goto bail;
	}

	inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG);
	if (!inode) {
		status = -ENOMEM;
		mlog_errno(status);
		goto bail;
	}

	d_instantiate(dentry, inode);
	dget(dentry);	/* Extra count - pin the dentry in core */
bail:
	return status;
}

static int dlmfs_unlink(struct inode *dir,
			struct dentry *dentry)
{
	int status;
	struct inode *inode = dentry->d_inode;

	mlog(0, "unlink inode %lu\n", inode->i_ino);

	/* if there are no current holders, or none that are waiting
	 * to acquire a lock, this basically destroys our lockres. */
	status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres);
	if (status < 0) {
		mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n",
		     dentry->d_name.len, dentry->d_name.name, status);
		goto bail;
	}
	status = simple_unlink(dir, dentry);
bail:
	return status;
}

static int dlmfs_fill_super(struct super_block * sb,
			    void * data,
			    int silent)
{
	struct inode * inode;
	struct dentry * root;

	sb->s_maxbytes = MAX_LFS_FILESIZE;
	sb->s_blocksize = PAGE_CACHE_SIZE;
	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
	sb->s_magic = DLMFS_MAGIC;
	sb->s_op = &dlmfs_ops;
	inode = dlmfs_get_root_inode(sb);
	if (!inode)
		return -ENOMEM;

	root = d_alloc_root(inode);
	if (!root) {
		iput(inode);
		return -ENOMEM;
	}
	sb->s_root = root;
	return 0;
}

static struct file_operations dlmfs_file_operations = {
	.open		= dlmfs_file_open,
	.release	= dlmfs_file_release,
	.read		= dlmfs_file_read,
	.write		= dlmfs_file_write,
};

static struct inode_operations dlmfs_dir_inode_operations = {
	.create		= dlmfs_create,
	.lookup		= simple_lookup,
	.unlink		= dlmfs_unlink,
};

/* this way we can restrict mkdir to only the toplevel of the fs. */
static struct inode_operations dlmfs_root_inode_operations = {
	.lookup		= simple_lookup,
	.mkdir		= dlmfs_mkdir,
	.rmdir		= simple_rmdir,
};

static struct super_operations dlmfs_ops = {
	.statfs		= simple_statfs,
	.alloc_inode	= dlmfs_alloc_inode,
	.destroy_inode	= dlmfs_destroy_inode,
	.clear_inode	= dlmfs_clear_inode,
	.drop_inode	= generic_delete_inode,
};

static struct inode_operations dlmfs_file_inode_operations = {
	.getattr	= simple_getattr,
};

static struct super_block *dlmfs_get_sb(struct file_system_type *fs_type,
	int flags, const char *dev_name, void *data)
{
	return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super);
}

static struct file_system_type dlmfs_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "ocfs2_dlmfs",
	.get_sb		= dlmfs_get_sb,
	.kill_sb	= kill_litter_super,
};

static int __init init_dlmfs_fs(void)
{
	int status;
	int cleanup_inode = 0, cleanup_worker = 0;

	dlmfs_print_version();

	dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
				sizeof(struct dlmfs_inode_private),
				0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
					SLAB_MEM_SPREAD),
				dlmfs_init_once, NULL);
	if (!dlmfs_inode_cache)
		return -ENOMEM;
	cleanup_inode = 1;

	user_dlm_worker = create_singlethread_workqueue("user_dlm");
	if (!user_dlm_worker) {
		status = -ENOMEM;
		goto bail;
	}
	cleanup_worker = 1;

	status = register_filesystem(&dlmfs_fs_type);
bail:
	if (status) {
		if (cleanup_inode)
			kmem_cache_destroy(dlmfs_inode_cache);
		if (cleanup_worker)
			destroy_workqueue(user_dlm_worker);
	} else
		printk("OCFS2 User DLM kernel interface loaded\n");
	return status;
}

static void __exit exit_dlmfs_fs(void)
{
	unregister_filesystem(&dlmfs_fs_type);

	flush_workqueue(user_dlm_worker);
	destroy_workqueue(user_dlm_worker);

	if (kmem_cache_destroy(dlmfs_inode_cache))
		printk(KERN_INFO "dlmfs_inode_cache: not all structures "
		       "were freed\n");
}

MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");

module_init(init_dlmfs_fs)
module_exit(exit_dlmfs_fs)
Commit	Line	Data
8df08c89 MF	1	/* -- mode: c; c-basic-offset: 8; --
	2	* vim: noexpandtab sw=8 ts=8 sts=0:
	3	*
	4	* dlmfs.c
	5	*
	6	* Code which implements the kernel side of a minimal userspace
	7	* interface to our DLM. This file handles the virtual file system
	8	* used for communication with userspace. Credit should go to ramfs,
	9	* which was a template for the fs side of this module.
	10	*
	11	* Copyright (C) 2003, 2004 Oracle. All rights reserved.
	12	*
	13	* This program is free software; you can redistribute it and/or
	14	* modify it under the terms of the GNU General Public
	15	* License as published by the Free Software Foundation; either
	16	* version 2 of the License, or (at your option) any later version.
	17	*
	18	* This program is distributed in the hope that it will be useful,
	19	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	20	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	21	* General Public License for more details.
	22	*
	23	* You should have received a copy of the GNU General Public
	24	* License along with this program; if not, write to the
	25	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
	26	* Boston, MA 021110-1307, USA.
	27	*/
	28
	29	/* Simple VFS hooks based on: */
	30	/*
	31	* Resizable simple ram filesystem for Linux.
	32	*
	33	* Copyright (C) 2000 Linus Torvalds.
	34	* 2000 Transmeta Corp.
	35	*/
	36
	37	#include <linux/module.h>
	38	#include <linux/fs.h>
	39	#include <linux/pagemap.h>
	40	#include <linux/types.h>
	41	#include <linux/slab.h>
	42	#include <linux/highmem.h>
	43	#include <linux/init.h>
	44	#include <linux/string.h>
	45	#include <linux/smp_lock.h>
	46	#include <linux/backing-dev.h>
	47
	48	#include <asm/uaccess.h>
	49
	50
	51	#include "cluster/nodemanager.h"
	52	#include "cluster/heartbeat.h"
	53	#include "cluster/tcp.h"
	54
	55	#include "dlmapi.h"
	56
	57	#include "userdlm.h"
	58
	59	#include "dlmfsver.h"
	60
	61	#define MLOG_MASK_PREFIX ML_DLMFS
	62	#include "cluster/masklog.h"
	63
	64	static struct super_operations dlmfs_ops;
65	static struct file_operations dlmfs_file_operations;
66	static struct inode_operations dlmfs_dir_inode_operations;
67	static struct inode_operations dlmfs_root_inode_operations;
68	static struct inode_operations dlmfs_file_inode_operations;
69	static kmem_cache_t *dlmfs_inode_cache;
70
71	struct workqueue_struct *user_dlm_worker;
72
73	/*
74	* decodes a set of open flags into a valid lock level and a set of flags.
75	* returns < 0 if we have invalid flags
76	* flags which mean something to us:
77	* O_RDONLY -> PRMODE level
78	* O_WRONLY -> EXMODE level
79	*
80	* O_NONBLOCK -> LKM_NOQUEUE
81	*/
82	static int dlmfs_decode_open_flags(int open_flags,
83	int *level,
84	int *flags)
85	{
86	if (open_flags & (O_WRONLY\|O_RDWR))
87	*level = LKM_EXMODE;
88	else
89	*level = LKM_PRMODE;
90
91	*flags = 0;
92	if (open_flags & O_NONBLOCK)
93	*flags \|= LKM_NOQUEUE;
94
95	return 0;
96	}
97
98	static int dlmfs_file_open(struct inode *inode,
99	struct file *file)
100	{
101	int status, level, flags;
102	struct dlmfs_filp_private *fp = NULL;
103	struct dlmfs_inode_private *ip;
104
105	if (S_ISDIR(inode->i_mode))
106	BUG();
107
108	mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino,
109	file->f_flags);
110
111	status = dlmfs_decode_open_flags(file->f_flags, &level, &flags);
112	if (status < 0)
113	goto bail;
114
115	/* We don't want to honor O_APPEND at read/write time as it
116	* doesn't make sense for LVB writes. */
117	file->f_flags &= ~O_APPEND;
118
119	fp = kmalloc(sizeof(*fp), GFP_KERNEL);
120	if (!fp) {
121	status = -ENOMEM;
122	goto bail;
123	}
124	fp->fp_lock_level = level;
125
126	ip = DLMFS_I(inode);
127
128	status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags);
129	if (status < 0) {
130	/* this is a strange error to return here but I want
131	* to be able userspace to be able to distinguish a
132	* valid lock request from one that simply couldn't be
133	* granted. */
134	if (flags & LKM_NOQUEUE && status == -EAGAIN)
135	status = -ETXTBSY;
136	kfree(fp);
137	goto bail;
138	}
139
140	file->private_data = fp;
141	bail:
142	return status;
143	}
144
145	static int dlmfs_file_release(struct inode *inode,
146	struct file *file)
147	{
148	int level, status;
149	struct dlmfs_inode_private *ip = DLMFS_I(inode);
150	struct dlmfs_filp_private *fp =
151	(struct dlmfs_filp_private *) file->private_data;
152
153	if (S_ISDIR(inode->i_mode))
154	BUG();
155
156	mlog(0, "close called on inode %lu\n", inode->i_ino);
157
158	status = 0;
159	if (fp) {
160	level = fp->fp_lock_level;
161	if (level != LKM_IVMODE)
162	user_dlm_cluster_unlock(&ip->ip_lockres, level);
163
164	kfree(fp);
165	file->private_data = NULL;
166	}
167
168	return 0;
169	}
170
171	static ssize_t dlmfs_file_read(struct file *filp,
172	char __user *buf,
173	size_t count,
174	loff_t *ppos)
175	{
176	int bytes_left;
177	ssize_t readlen;
178	char *lvb_buf;
179	struct inode *inode = filp->f_dentry->d_inode;
180
181	mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
182	inode->i_ino, count, *ppos);
183
184	if (*ppos >= i_size_read(inode))
185	return 0;
186
187	if (!count)
188	return 0;
189
190	if (!access_ok(VERIFY_WRITE, buf, count))
191	return -EFAULT;
192
193	/* don't read past the lvb */
194	if ((count + *ppos) > i_size_read(inode))
195	readlen = i_size_read(inode) - *ppos;
196	else
197	readlen = count - *ppos;
198
199	lvb_buf = kmalloc(readlen, GFP_KERNEL);
200	if (!lvb_buf)
201	return -ENOMEM;
202
203	user_dlm_read_lvb(inode, lvb_buf, readlen);
204	bytes_left = __copy_to_user(buf, lvb_buf, readlen);
205	readlen -= bytes_left;
206
207	kfree(lvb_buf);
208
209	ppos = ppos + readlen;
210
211	mlog(0, "read %zd bytes\n", readlen);
212	return readlen;
213	}
214
215	static ssize_t dlmfs_file_write(struct file *filp,
216	const char __user *buf,
217	size_t count,
218	loff_t *ppos)
219	{
220	int bytes_left;
221	ssize_t writelen;
222	char *lvb_buf;
223	struct inode *inode = filp->f_dentry->d_inode;
224
225	mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
226	inode->i_ino, count, *ppos);
227
228	if (*ppos >= i_size_read(inode))
229	return -ENOSPC;
230
231	if (!count)
232	return 0;
233
234	if (!access_ok(VERIFY_READ, buf, count))
235	return -EFAULT;
236
237	/* don't write past the lvb */
238	if ((count + *ppos) > i_size_read(inode))
239	writelen = i_size_read(inode) - *ppos;
240	else
241	writelen = count - *ppos;
242
243	lvb_buf = kmalloc(writelen, GFP_KERNEL);
244	if (!lvb_buf)
245	return -ENOMEM;
246
247	bytes_left = copy_from_user(lvb_buf, buf, writelen);
248	writelen -= bytes_left;
249	if (writelen)
250	user_dlm_write_lvb(inode, lvb_buf, writelen);
251
252	kfree(lvb_buf);
253
254	ppos = ppos + writelen;
255	mlog(0, "wrote %zd bytes\n", writelen);
256	return writelen;
257	}
258
259	static void dlmfs_init_once(void *foo,
260	kmem_cache_t *cachep,
261	unsigned long flags)
262	{
263	struct dlmfs_inode_private *ip =
264	(struct dlmfs_inode_private *) foo;
265
266	if ((flags & (SLAB_CTOR_VERIFY\|SLAB_CTOR_CONSTRUCTOR)) ==
267	SLAB_CTOR_CONSTRUCTOR) {
268	ip->ip_dlm = NULL;
269	ip->ip_parent = NULL;
270
271	inode_init_once(&ip->ip_vfs_inode);
272	}
273	}
274
275	static struct inode dlmfs_alloc_inode(struct super_block sb)
276	{
277	struct dlmfs_inode_private *ip;
278
279	ip = kmem_cache_alloc(dlmfs_inode_cache, SLAB_NOFS);
280	if (!ip)
281	return NULL;
282
283	return &ip->ip_vfs_inode;
284	}
285
286	static void dlmfs_destroy_inode(struct inode *inode)
287	{
288	kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
289	}
290
291	static void dlmfs_clear_inode(struct inode *inode)
292	{
293	int status;
294	struct dlmfs_inode_private *ip;
295
296	if (!inode)
297	return;
298
299	mlog(0, "inode %lu\n", inode->i_ino);
300
301	ip = DLMFS_I(inode);
302
303	if (S_ISREG(inode->i_mode)) {
304	status = user_dlm_destroy_lock(&ip->ip_lockres);
305	if (status < 0)
306	mlog_errno(status);
307	iput(ip->ip_parent);
308	goto clear_fields;
309	}
310
311	mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm);
312	/* we must be a directory. If required, lets unregister the
313	* dlm context now. */
314	if (ip->ip_dlm)
315	user_dlm_unregister_context(ip->ip_dlm);
316	clear_fields:
317	ip->ip_parent = NULL;
318	ip->ip_dlm = NULL;
319	}
320
321	static struct backing_dev_info dlmfs_backing_dev_info = {
322	.ra_pages = 0, /* No readahead */
323	.capabilities = BDI_CAP_NO_ACCT_DIRTY \| BDI_CAP_NO_WRITEBACK,
324	};
325
326	static struct inode dlmfs_get_root_inode(struct super_block sb)
327	{
328	struct inode *inode = new_inode(sb);
329	int mode = S_IFDIR \| 0755;
330	struct dlmfs_inode_private *ip;
331
332	if (inode) {
333	ip = DLMFS_I(inode);
334
335	inode->i_mode = mode;
336	inode->i_uid = current->fsuid;
337	inode->i_gid = current->fsgid;
338	inode->i_blksize = PAGE_CACHE_SIZE;
339	inode->i_blocks = 0;
340	inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
341	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
342	inode->i_nlink++;
343
344	inode->i_fop = &simple_dir_operations;
345	inode->i_op = &dlmfs_root_inode_operations;
346	}
347
348	return inode;
349	}
350
351	static struct inode dlmfs_get_inode(struct inode parent,
352	struct dentry *dentry,
353	int mode)
354	{
355	struct super_block *sb = parent->i_sb;
356	struct inode * inode = new_inode(sb);
357	struct dlmfs_inode_private *ip;
358
359	if (!inode)
360	return NULL;
361
362	inode->i_mode = mode;
363	inode->i_uid = current->fsuid;
364	inode->i_gid = current->fsgid;
365	inode->i_blksize = PAGE_CACHE_SIZE;
366	inode->i_blocks = 0;
367	inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
368	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
369
370	ip = DLMFS_I(inode);
371	ip->ip_dlm = DLMFS_I(parent)->ip_dlm;
372
373	switch (mode & S_IFMT) {
374	default:
375	/* for now we don't support anything other than
376	* directories and regular files. */
377	BUG();
378	break;
379	case S_IFREG:
380	inode->i_op = &dlmfs_file_inode_operations;
381	inode->i_fop = &dlmfs_file_operations;
382
383	i_size_write(inode, DLM_LVB_LEN);
384
385	user_dlm_lock_res_init(&ip->ip_lockres, dentry);
386
387	/* released at clear_inode time, this insures that we
388	* get to drop the dlm reference on each lock before
389	* we call the unregister code for releasing parent
390	* directories. */
391	ip->ip_parent = igrab(parent);
392	BUG_ON(!ip->ip_parent);
393	break;
394	case S_IFDIR:
395	inode->i_op = &dlmfs_dir_inode_operations;
396	inode->i_fop = &simple_dir_operations;
397
398	/* directory inodes start off with i_nlink ==
399	* 2 (for "." entry) */
400	inode->i_nlink++;
401	break;
402	}
403
404	if (parent->i_mode & S_ISGID) {
405	inode->i_gid = parent->i_gid;
406	if (S_ISDIR(mode))
407	inode->i_mode \|= S_ISGID;
408	}
409
410	return inode;
411	}
412
413	/*
414	* File creation. Allocate an inode, and we're done..
415	*/
416	/* SMP-safe */
417	static int dlmfs_mkdir(struct inode * dir,
418	struct dentry * dentry,
419	int mode)
420	{
421	int status;
422	struct inode *inode = NULL;
423	struct qstr *domain = &dentry->d_name;
424	struct dlmfs_inode_private *ip;
425	struct dlm_ctxt *dlm;
426
427	mlog(0, "mkdir %.*s\n", domain->len, domain->name);
428
429	/* verify that we have a proper domain */
430	if (domain->len >= O2NM_MAX_NAME_LEN) {
431	status = -EINVAL;
432	mlog(ML_ERROR, "invalid domain name for directory.\n");
433	goto bail;
434	}
435
436	inode = dlmfs_get_inode(dir, dentry, mode \| S_IFDIR);
437	if (!inode) {
438	status = -ENOMEM;
439	mlog_errno(status);
440	goto bail;
441	}
442
443	ip = DLMFS_I(inode);
444
445	dlm = user_dlm_register_context(domain);
446	if (IS_ERR(dlm)) {
447	status = PTR_ERR(dlm);
448	mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n",
449	status, domain->len, domain->name);
450	goto bail;
451	}
452	ip->ip_dlm = dlm;
453
454	dir->i_nlink++;
455	d_instantiate(dentry, inode);
456	dget(dentry); /* Extra count - pin the dentry in core */
457
458	status = 0;
459	bail:
460	if (status < 0)
461	iput(inode);
462	return status;
463	}
464
465	static int dlmfs_create(struct inode *dir,
466	struct dentry *dentry,
467	int mode,
468	struct nameidata *nd)
469	{
470	int status = 0;
471	struct inode *inode;
472	struct qstr *name = &dentry->d_name;
473
474	mlog(0, "create %.*s\n", name->len, name->name);
475
476	/* verify name is valid and doesn't contain any dlm reserved
477	* characters */
478	if (name->len >= USER_DLM_LOCK_ID_MAX_LEN \|\|
479	name->name[0] == '$') {
480	status = -EINVAL;
481	mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len,
482	name->name);
483	goto bail;
484	}
485
486	inode = dlmfs_get_inode(dir, dentry, mode \| S_IFREG);
487	if (!inode) {
488	status = -ENOMEM;
489	mlog_errno(status);
490	goto bail;
491	}
492
493	d_instantiate(dentry, inode);
494	dget(dentry); /* Extra count - pin the dentry in core */
495	bail:
496	return status;
497	}
498
499	static int dlmfs_unlink(struct inode *dir,
500	struct dentry *dentry)
501	{
502	int status;
503	struct inode *inode = dentry->d_inode;
504
505	mlog(0, "unlink inode %lu\n", inode->i_ino);
506
507	/* if there are no current holders, or none that are waiting
508	* to acquire a lock, this basically destroys our lockres. */
509	status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres);
510	if (status < 0) {
511	mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n",
512	dentry->d_name.len, dentry->d_name.name, status);
513	goto bail;
514	}
515	status = simple_unlink(dir, dentry);
516	bail:
517	return status;
518	}
519
520	static int dlmfs_fill_super(struct super_block * sb,
521	void * data,
522	int silent)
523	{
524	struct inode * inode;
525	struct dentry * root;
526
527	sb->s_maxbytes = MAX_LFS_FILESIZE;
528	sb->s_blocksize = PAGE_CACHE_SIZE;
529	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
530	sb->s_magic = DLMFS_MAGIC;
531	sb->s_op = &dlmfs_ops;
532	inode = dlmfs_get_root_inode(sb);
533	if (!inode)
534	return -ENOMEM;
535
536	root = d_alloc_root(inode);
537	if (!root) {
538	iput(inode);
539	return -ENOMEM;
540	}
541	sb->s_root = root;
542	return 0;
543	}
544
545	static struct file_operations dlmfs_file_operations = {
546	.open = dlmfs_file_open,
547	.release = dlmfs_file_release,
548	.read = dlmfs_file_read,
549	.write = dlmfs_file_write,
550	};
551
552	static struct inode_operations dlmfs_dir_inode_operations = {
553	.create = dlmfs_create,
554	.lookup = simple_lookup,
555	.unlink = dlmfs_unlink,
556	};
557
558	/* this way we can restrict mkdir to only the toplevel of the fs. */
559	static struct inode_operations dlmfs_root_inode_operations = {
560	.lookup = simple_lookup,
561	.mkdir = dlmfs_mkdir,
562	.rmdir = simple_rmdir,
563	};
564
565	static struct super_operations dlmfs_ops = {
566	.statfs = simple_statfs,
567	.alloc_inode = dlmfs_alloc_inode,
568	.destroy_inode = dlmfs_destroy_inode,
569	.clear_inode = dlmfs_clear_inode,
570	.drop_inode = generic_delete_inode,
571	};
572
573	static struct inode_operations dlmfs_file_inode_operations = {
574	.getattr = simple_getattr,
575	};
576
577	static struct super_block dlmfs_get_sb(struct file_system_type fs_type,
578	int flags, const char dev_name, void data)
579	{
580	return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super);
581	}
582
583	static struct file_system_type dlmfs_fs_type = {
584	.owner = THIS_MODULE,
585	.name = "ocfs2_dlmfs",
586	.get_sb = dlmfs_get_sb,
587	.kill_sb = kill_litter_super,
588	};
589
590	static int __init init_dlmfs_fs(void)
591	{
592	int status;
593	int cleanup_inode = 0, cleanup_worker = 0;
594
595	dlmfs_print_version();
596
597	dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
598	sizeof(struct dlmfs_inode_private),
fffb60f9 PJ	599	0, (SLAB_HWCACHE_ALIGN\|SLAB_RECLAIM_ACCOUNT\|
fffb60f9 PJ	600	SLAB_MEM_SPREAD),
8df08c89 MF	601	dlmfs_init_once, NULL);
	602	if (!dlmfs_inode_cache)
	603	return -ENOMEM;
	604	cleanup_inode = 1;
	605
	606	user_dlm_worker = create_singlethread_workqueue("user_dlm");
	607	if (!user_dlm_worker) {
	608	status = -ENOMEM;
	609	goto bail;
	610	}
	611	cleanup_worker = 1;
	612
	613	status = register_filesystem(&dlmfs_fs_type);
	614	bail:
	615	if (status) {
	616	if (cleanup_inode)
	617	kmem_cache_destroy(dlmfs_inode_cache);
	618	if (cleanup_worker)
	619	destroy_workqueue(user_dlm_worker);
	620	} else
	621	printk("OCFS2 User DLM kernel interface loaded\n");
	622	return status;
	623	}
	624
	625	static void __exit exit_dlmfs_fs(void)
	626	{
	627	unregister_filesystem(&dlmfs_fs_type);
	628
	629	flush_workqueue(user_dlm_worker);
	630	destroy_workqueue(user_dlm_worker);
	631
	632	if (kmem_cache_destroy(dlmfs_inode_cache))
	633	printk(KERN_INFO "dlmfs_inode_cache: not all structures "
	634	"were freed\n");
	635	}
	636
	637	MODULE_AUTHOR("Oracle");
	638	MODULE_LICENSE("GPL");
	639
	640	module_init(init_dlmfs_fs)
	641	module_exit(exit_dlmfs_fs)