]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/zpl_super.c
Modify vdev_elevator_switch() to use elevator_change()
[mirror_zfs.git] / module / zfs / zpl_super.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23 */
24
25
26 #include <sys/zfs_vfsops.h>
27 #include <sys/zfs_vnops.h>
28 #include <sys/zfs_znode.h>
29 #include <sys/zfs_ctldir.h>
30 #include <sys/zpl.h>
31
32
33 static struct inode *
34 zpl_inode_alloc(struct super_block *sb)
35 {
36 struct inode *ip;
37
38 VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0);
39 ip->i_version = 1;
40
41 return (ip);
42 }
43
44 static void
45 zpl_inode_destroy(struct inode *ip)
46 {
47 ASSERT(atomic_read(&ip->i_count) == 0);
48 zfs_inode_destroy(ip);
49 }
50
51 /*
52 * When ->drop_inode() is called its return value indicates if the
53 * inode should be evicted from the inode cache. If the inode is
54 * unhashed and has no links the default policy is to evict it
55 * immediately.
56 *
57 * Prior to 2.6.36 this eviction was accomplished by the vfs calling
58 * ->delete_inode(). It was ->delete_inode()'s responsibility to
59 * truncate the inode pages and call clear_inode(). The call to
60 * clear_inode() synchronously invalidates all the buffers and
61 * calls ->clear_inode(). It was ->clear_inode()'s responsibility
62 * to cleanup and filesystem specific data before freeing the inode.
63 *
64 * This elaborate mechanism was replaced by ->evict_inode() which
65 * does the job of both ->delete_inode() and ->clear_inode(). It
66 * will be called exactly once, and when it returns the inode must
67 * be in a state where it can simply be freed.i
68 *
69 * The ->evict_inode() callback must minimally truncate the inode pages,
70 * and call clear_inode(). For 2.6.35 and later kernels this will
71 * simply update the inode state, with the sync occurring before the
72 * truncate in evict(). For earlier kernels clear_inode() maps to
73 * end_writeback() which is responsible for completing all outstanding
74 * write back. In either case, once this is done it is safe to cleanup
75 * any remaining inode specific data via zfs_inactive().
76 * remaining filesystem specific data.
77 */
78 #ifdef HAVE_EVICT_INODE
79 static void
80 zpl_evict_inode(struct inode *ip)
81 {
82 truncate_setsize(ip, 0);
83 clear_inode(ip);
84 zfs_inactive(ip);
85 }
86
87 #else
88
89 static void
90 zpl_clear_inode(struct inode *ip)
91 {
92 zfs_inactive(ip);
93 }
94
95 static void
96 zpl_inode_delete(struct inode *ip)
97 {
98 truncate_setsize(ip, 0);
99 clear_inode(ip);
100 }
101
102 #endif /* HAVE_EVICT_INODE */
103
104 static void
105 zpl_put_super(struct super_block *sb)
106 {
107 int error;
108
109 error = -zfs_umount(sb);
110 ASSERT3S(error, <=, 0);
111 }
112
113 static int
114 zpl_sync_fs(struct super_block *sb, int wait)
115 {
116 cred_t *cr = CRED();
117 int error;
118
119 crhold(cr);
120 error = -zfs_sync(sb, wait, cr);
121 crfree(cr);
122 ASSERT3S(error, <=, 0);
123
124 return (error);
125 }
126
127 static int
128 zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
129 {
130 int error;
131
132 error = -zfs_statvfs(dentry, statp);
133 ASSERT3S(error, <=, 0);
134
135 return (error);
136 }
137
138 static int
139 zpl_remount_fs(struct super_block *sb, int *flags, char *data)
140 {
141 int error;
142 error = -zfs_remount(sb, flags, data);
143 ASSERT3S(error, <=, 0);
144
145 return (error);
146 }
147
148 static void
149 zpl_umount_begin(struct super_block *sb)
150 {
151 zfs_sb_t *zsb = sb->s_fs_info;
152 int count;
153
154 /*
155 * Best effort to unmount snapshots in .zfs/snapshot/. Normally this
156 * isn't required because snapshots have the MNT_SHRINKABLE flag set.
157 */
158 if (zsb->z_ctldir)
159 (void) zfsctl_unmount_snapshots(zsb, MNT_FORCE, &count);
160 }
161
162 /*
163 * The Linux VFS automatically handles the following flags:
164 * MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, MNT_NOATIME, MNT_READONLY
165 */
166 #ifdef HAVE_SHOW_OPTIONS_WITH_DENTRY
167 static int
168 zpl_show_options(struct seq_file *seq, struct dentry *root)
169 {
170 zfs_sb_t *zsb = root->d_sb->s_fs_info;
171
172 seq_printf(seq, ",%s", zsb->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
173
174 return (0);
175 }
176 #else
177 static int
178 zpl_show_options(struct seq_file *seq, struct vfsmount *vfsp)
179 {
180 zfs_sb_t *zsb = vfsp->mnt_sb->s_fs_info;
181
182 seq_printf(seq, ",%s", zsb->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
183
184 return (0);
185 }
186 #endif /* HAVE_SHOW_OPTIONS_WITH_DENTRY */
187
188 static int
189 zpl_fill_super(struct super_block *sb, void *data, int silent)
190 {
191 int error;
192
193 error = -zfs_domount(sb, data, silent);
194 ASSERT3S(error, <=, 0);
195
196 return (error);
197 }
198
199 #ifdef HAVE_MOUNT_NODEV
200 static struct dentry *
201 zpl_mount(struct file_system_type *fs_type, int flags,
202 const char *osname, void *data)
203 {
204 zpl_mount_data_t zmd = { osname, data };
205
206 return mount_nodev(fs_type, flags, &zmd, zpl_fill_super);
207 }
208 #else
209 static int
210 zpl_get_sb(struct file_system_type *fs_type, int flags,
211 const char *osname, void *data, struct vfsmount *mnt)
212 {
213 zpl_mount_data_t zmd = { osname, data };
214
215 return get_sb_nodev(fs_type, flags, &zmd, zpl_fill_super, mnt);
216 }
217 #endif /* HAVE_MOUNT_NODEV */
218
219 static void
220 zpl_kill_sb(struct super_block *sb)
221 {
222 zfs_preumount(sb);
223 kill_anon_super(sb);
224 }
225
226 #ifdef HAVE_SHRINK
227 /*
228 * Linux 3.1 - 3.x API
229 *
230 * The Linux 3.1 API introduced per-sb cache shrinkers to replace the
231 * global ones. This allows us a mechanism to cleanly target a specific
232 * zfs file system when the dnode and inode caches grow too large.
233 *
234 * In addition, the 3.0 kernel added the iterate_supers_type() helper
235 * function which is used to safely walk all of the zfs file systems.
236 */
237 static void
238 zpl_prune_sb(struct super_block *sb, void *arg)
239 {
240 int objects = 0;
241 int error;
242
243 error = -zfs_sb_prune(sb, *(unsigned long *)arg, &objects);
244 ASSERT3S(error, <=, 0);
245
246 return;
247 }
248
249 void
250 zpl_prune_sbs(int64_t bytes_to_scan, void *private)
251 {
252 unsigned long nr_to_scan = (bytes_to_scan / sizeof(znode_t));
253
254 iterate_supers_type(&zpl_fs_type, zpl_prune_sb, &nr_to_scan);
255 kmem_reap();
256 }
257 #else
258 /*
259 * Linux 2.6.x - 3.0 API
260 *
261 * These are best effort interfaces are provided by the SPL to induce
262 * the Linux VM subsystem to reclaim a fraction of the both dnode and
263 * inode caches. Ideally, we want to just target the zfs file systems
264 * however our only option is to reclaim from them all.
265 */
266 void
267 zpl_prune_sbs(int64_t bytes_to_scan, void *private)
268 {
269 unsigned long nr_to_scan = (bytes_to_scan / sizeof(znode_t));
270
271 shrink_dcache_memory(nr_to_scan, GFP_KERNEL);
272 shrink_icache_memory(nr_to_scan, GFP_KERNEL);
273 kmem_reap();
274 }
275 #endif /* HAVE_SHRINK */
276
277 #ifdef HAVE_NR_CACHED_OBJECTS
278 static int
279 zpl_nr_cached_objects(struct super_block *sb)
280 {
281 zfs_sb_t *zsb = sb->s_fs_info;
282 int nr;
283
284 mutex_enter(&zsb->z_znodes_lock);
285 nr = zsb->z_nr_znodes;
286 mutex_exit(&zsb->z_znodes_lock);
287
288 return (nr);
289 }
290 #endif /* HAVE_NR_CACHED_OBJECTS */
291
292 #ifdef HAVE_FREE_CACHED_OBJECTS
293 /*
294 * Attempt to evict some meta data from the cache. The ARC operates in
295 * terms of bytes while the Linux VFS uses objects. Now because this is
296 * just a best effort eviction and the exact values aren't critical so we
297 * extrapolate from an object count to a byte size using the znode_t size.
298 */
299 static void
300 zpl_free_cached_objects(struct super_block *sb, int nr_to_scan)
301 {
302 arc_adjust_meta(nr_to_scan * sizeof(znode_t), B_FALSE);
303 }
304 #endif /* HAVE_FREE_CACHED_OBJECTS */
305
306 const struct super_operations zpl_super_operations = {
307 .alloc_inode = zpl_inode_alloc,
308 .destroy_inode = zpl_inode_destroy,
309 .dirty_inode = NULL,
310 .write_inode = NULL,
311 .drop_inode = NULL,
312 #ifdef HAVE_EVICT_INODE
313 .evict_inode = zpl_evict_inode,
314 #else
315 .clear_inode = zpl_clear_inode,
316 .delete_inode = zpl_inode_delete,
317 #endif /* HAVE_EVICT_INODE */
318 .put_super = zpl_put_super,
319 .write_super = NULL,
320 .sync_fs = zpl_sync_fs,
321 .statfs = zpl_statfs,
322 .remount_fs = zpl_remount_fs,
323 .umount_begin = zpl_umount_begin,
324 .show_options = zpl_show_options,
325 .show_stats = NULL,
326 #ifdef HAVE_NR_CACHED_OBJECTS
327 .nr_cached_objects = zpl_nr_cached_objects,
328 #endif /* HAVE_NR_CACHED_OBJECTS */
329 #ifdef HAVE_FREE_CACHED_OBJECTS
330 .free_cached_objects = zpl_free_cached_objects,
331 #endif /* HAVE_FREE_CACHED_OBJECTS */
332 };
333
334 struct file_system_type zpl_fs_type = {
335 .owner = THIS_MODULE,
336 .name = ZFS_DRIVER,
337 #ifdef HAVE_MOUNT_NODEV
338 .mount = zpl_mount,
339 #else
340 .get_sb = zpl_get_sb,
341 #endif /* HAVE_MOUNT_NODEV */
342 .kill_sb = zpl_kill_sb,
343 };