4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * cl_device and cl_device_type implementation for VVP layer.
38 * Author: Nikita Danilov <nikita.danilov@sun.com>
41 #define DEBUG_SUBSYSTEM S_LLITE
44 #include "../include/obd.h"
45 #include "../include/lustre_lite.h"
46 #include "llite_internal.h"
47 #include "vvp_internal.h"
49 /*****************************************************************************
51 * Vvp device and device type functions.
56 * vvp_ prefix stands for "Vfs Vm Posix". It corresponds to historical
57 * "llite_" (var. "ll_") prefix.
60 static struct kmem_cache
*vvp_thread_kmem
;
61 static struct kmem_cache
*vvp_session_kmem
;
62 static struct lu_kmem_descr vvp_caches
[] = {
64 .ckd_cache
= &vvp_thread_kmem
,
65 .ckd_name
= "vvp_thread_kmem",
66 .ckd_size
= sizeof(struct vvp_thread_info
),
69 .ckd_cache
= &vvp_session_kmem
,
70 .ckd_name
= "vvp_session_kmem",
71 .ckd_size
= sizeof(struct vvp_session
)
78 static void *vvp_key_init(const struct lu_context
*ctx
,
79 struct lu_context_key
*key
)
81 struct vvp_thread_info
*info
;
83 OBD_SLAB_ALLOC_PTR_GFP(info
, vvp_thread_kmem
, GFP_NOFS
);
85 info
= ERR_PTR(-ENOMEM
);
89 static void vvp_key_fini(const struct lu_context
*ctx
,
90 struct lu_context_key
*key
, void *data
)
92 struct vvp_thread_info
*info
= data
;
94 OBD_SLAB_FREE_PTR(info
, vvp_thread_kmem
);
97 static void *vvp_session_key_init(const struct lu_context
*ctx
,
98 struct lu_context_key
*key
)
100 struct vvp_session
*session
;
102 OBD_SLAB_ALLOC_PTR_GFP(session
, vvp_session_kmem
, GFP_NOFS
);
104 session
= ERR_PTR(-ENOMEM
);
108 static void vvp_session_key_fini(const struct lu_context
*ctx
,
109 struct lu_context_key
*key
, void *data
)
111 struct vvp_session
*session
= data
;
113 OBD_SLAB_FREE_PTR(session
, vvp_session_kmem
);
117 struct lu_context_key vvp_key
= {
118 .lct_tags
= LCT_CL_THREAD
,
119 .lct_init
= vvp_key_init
,
120 .lct_fini
= vvp_key_fini
123 struct lu_context_key vvp_session_key
= {
124 .lct_tags
= LCT_SESSION
,
125 .lct_init
= vvp_session_key_init
,
126 .lct_fini
= vvp_session_key_fini
129 /* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */
130 LU_TYPE_INIT_FINI(vvp
, &ccc_key
, &ccc_session_key
, &vvp_key
, &vvp_session_key
);
132 static const struct lu_device_operations vvp_lu_ops
= {
133 .ldo_object_alloc
= vvp_object_alloc
136 static const struct cl_device_operations vvp_cl_ops
= {
137 .cdo_req_init
= ccc_req_init
140 static struct lu_device
*vvp_device_alloc(const struct lu_env
*env
,
141 struct lu_device_type
*t
,
142 struct lustre_cfg
*cfg
)
144 return ccc_device_alloc(env
, t
, cfg
, &vvp_lu_ops
, &vvp_cl_ops
);
147 static const struct lu_device_type_operations vvp_device_type_ops
= {
148 .ldto_init
= vvp_type_init
,
149 .ldto_fini
= vvp_type_fini
,
151 .ldto_start
= vvp_type_start
,
152 .ldto_stop
= vvp_type_stop
,
154 .ldto_device_alloc
= vvp_device_alloc
,
155 .ldto_device_free
= ccc_device_free
,
156 .ldto_device_init
= ccc_device_init
,
157 .ldto_device_fini
= ccc_device_fini
160 struct lu_device_type vvp_device_type
= {
161 .ldt_tags
= LU_DEVICE_CL
,
162 .ldt_name
= LUSTRE_VVP_NAME
,
163 .ldt_ops
= &vvp_device_type_ops
,
164 .ldt_ctx_tags
= LCT_CL_THREAD
168 * A mutex serializing calls to vvp_inode_fini() under extreme memory
169 * pressure, when environments cannot be allocated.
171 int vvp_global_init(void)
175 result
= lu_kmem_init(vvp_caches
);
177 result
= ccc_global_init(&vvp_device_type
);
179 lu_kmem_fini(vvp_caches
);
184 void vvp_global_fini(void)
186 ccc_global_fini(&vvp_device_type
);
187 lu_kmem_fini(vvp_caches
);
191 /*****************************************************************************
193 * mirror obd-devices into cl devices.
197 int cl_sb_init(struct super_block
*sb
)
199 struct ll_sb_info
*sbi
;
200 struct cl_device
*cl
;
206 env
= cl_env_get(&refcheck
);
208 cl
= cl_type_setup(env
, NULL
, &vvp_device_type
,
209 sbi
->ll_dt_exp
->exp_obd
->obd_lu_dev
);
211 cl2ccc_dev(cl
)->cdv_sb
= sb
;
213 sbi
->ll_site
= cl2lu_dev(cl
)->ld_site
;
215 cl_env_put(env
, &refcheck
);
221 int cl_sb_fini(struct super_block
*sb
)
223 struct ll_sb_info
*sbi
;
225 struct cl_device
*cld
;
230 env
= cl_env_get(&refcheck
);
235 cl_stack_fini(env
, cld
);
239 cl_env_put(env
, &refcheck
);
242 CERROR("Cannot cleanup cl-stack due to memory shortage.\n");
243 result
= PTR_ERR(env
);
246 * If mount failed (sbi->ll_cl == NULL), and this there are no other
247 * mounts, stop device types manually (this usually happens
248 * automatically when last device is destroyed).
254 /****************************************************************************
256 * /proc/fs/lustre/llite/$MNT/dump_page_cache
258 ****************************************************************************/
261 * To represent contents of a page cache as a byte stream, following
262 * information if encoded in 64bit offset:
264 * - file hash bucket in lu_site::ls_hash[] 28bits
266 * - how far file is from bucket head 4bits
268 * - page index 32bits
270 * First two data identify a file in the cache uniquely.
273 #define PGC_OBJ_SHIFT (32 + 4)
274 #define PGC_DEPTH_SHIFT (32)
276 struct vvp_pgcache_id
{
282 struct lu_object_header
*vpi_obj
;
285 static void vvp_pgcache_id_unpack(loff_t pos
, struct vvp_pgcache_id
*id
)
287 CLASSERT(sizeof(pos
) == sizeof(__u64
));
289 id
->vpi_index
= pos
& 0xffffffff;
290 id
->vpi_depth
= (pos
>> PGC_DEPTH_SHIFT
) & 0xf;
291 id
->vpi_bucket
= (unsigned long long)pos
>> PGC_OBJ_SHIFT
;
294 static loff_t
vvp_pgcache_id_pack(struct vvp_pgcache_id
*id
)
297 ((__u64
)id
->vpi_index
) |
298 ((__u64
)id
->vpi_depth
<< PGC_DEPTH_SHIFT
) |
299 ((__u64
)id
->vpi_bucket
<< PGC_OBJ_SHIFT
);
302 static int vvp_pgcache_obj_get(struct cfs_hash
*hs
, struct cfs_hash_bd
*bd
,
303 struct hlist_node
*hnode
, void *data
)
305 struct vvp_pgcache_id
*id
= data
;
306 struct lu_object_header
*hdr
= cfs_hash_object(hs
, hnode
);
308 if (id
->vpi_curdep
-- > 0)
309 return 0; /* continue */
311 if (lu_object_is_dying(hdr
))
314 cfs_hash_get(hs
, hnode
);
319 static struct cl_object
*vvp_pgcache_obj(const struct lu_env
*env
,
320 struct lu_device
*dev
,
321 struct vvp_pgcache_id
*id
)
323 LASSERT(lu_device_is_cl(dev
));
325 id
->vpi_depth
&= 0xf;
327 id
->vpi_curdep
= id
->vpi_depth
;
329 cfs_hash_hlist_for_each(dev
->ld_site
->ls_obj_hash
, id
->vpi_bucket
,
330 vvp_pgcache_obj_get
, id
);
331 if (id
->vpi_obj
!= NULL
) {
332 struct lu_object
*lu_obj
;
334 lu_obj
= lu_object_locate(id
->vpi_obj
, dev
->ld_type
);
335 if (lu_obj
!= NULL
) {
336 lu_object_ref_add(lu_obj
, "dump", current
);
337 return lu2cl(lu_obj
);
339 lu_object_put(env
, lu_object_top(id
->vpi_obj
));
341 } else if (id
->vpi_curdep
> 0) {
347 static loff_t
vvp_pgcache_find(const struct lu_env
*env
,
348 struct lu_device
*dev
, loff_t pos
)
350 struct cl_object
*clob
;
351 struct lu_site
*site
;
352 struct vvp_pgcache_id id
;
355 vvp_pgcache_id_unpack(pos
, &id
);
358 if (id
.vpi_bucket
>= CFS_HASH_NHLIST(site
->ls_obj_hash
))
360 clob
= vvp_pgcache_obj(env
, dev
, &id
);
362 struct cl_object_header
*hdr
;
366 /* got an object. Find next page. */
367 hdr
= cl_object_header(clob
);
369 spin_lock(&hdr
->coh_page_guard
);
370 nr
= radix_tree_gang_lookup(&hdr
->coh_tree
,
374 id
.vpi_index
= pg
->cp_index
;
375 /* Cant support over 16T file */
376 nr
= !(pg
->cp_index
> 0xffffffff);
378 spin_unlock(&hdr
->coh_page_guard
);
380 lu_object_ref_del(&clob
->co_lu
, "dump", current
);
381 cl_object_put(env
, clob
);
383 return vvp_pgcache_id_pack(&id
);
385 /* to the next object. */
388 if (id
.vpi_depth
== 0 && ++id
.vpi_bucket
== 0)
394 #define seq_page_flag(seq, page, flag, has_flags) do { \
395 if (test_bit(PG_##flag, &(page)->flags)) { \
396 seq_printf(seq, "%s"#flag, has_flags ? "|" : ""); \
401 static void vvp_pgcache_page_show(const struct lu_env
*env
,
402 struct seq_file
*seq
, struct cl_page
*page
)
404 struct ccc_page
*cpg
;
408 cpg
= cl2ccc_page(cl_page_at(page
, &vvp_device_type
));
409 vmpage
= cpg
->cpg_page
;
410 seq_printf(seq
, " %5i | %p %p %s %s %s %s | %p %lu/%u(%p) %lu %u [",
414 cpg
->cpg_write_queued
? "wq" : "- ",
415 cpg
->cpg_defer_uptodate
? "du" : "- ",
416 PageWriteback(vmpage
) ? "wb" : "-",
417 vmpage
, vmpage
->mapping
->host
->i_ino
,
418 vmpage
->mapping
->host
->i_generation
,
419 vmpage
->mapping
->host
, vmpage
->index
,
422 seq_page_flag(seq
, vmpage
, locked
, has_flags
);
423 seq_page_flag(seq
, vmpage
, error
, has_flags
);
424 seq_page_flag(seq
, vmpage
, referenced
, has_flags
);
425 seq_page_flag(seq
, vmpage
, uptodate
, has_flags
);
426 seq_page_flag(seq
, vmpage
, dirty
, has_flags
);
427 seq_page_flag(seq
, vmpage
, writeback
, has_flags
);
428 seq_printf(seq
, "%s]\n", has_flags
? "" : "-");
431 static int vvp_pgcache_show(struct seq_file
*f
, void *v
)
434 struct ll_sb_info
*sbi
;
435 struct cl_object
*clob
;
437 struct cl_page
*page
;
438 struct cl_object_header
*hdr
;
439 struct vvp_pgcache_id id
;
443 env
= cl_env_get(&refcheck
);
446 vvp_pgcache_id_unpack(pos
, &id
);
448 clob
= vvp_pgcache_obj(env
, &sbi
->ll_cl
->cd_lu_dev
, &id
);
450 hdr
= cl_object_header(clob
);
452 spin_lock(&hdr
->coh_page_guard
);
453 page
= cl_page_lookup(hdr
, id
.vpi_index
);
454 spin_unlock(&hdr
->coh_page_guard
);
456 seq_printf(f
, "%8x@"DFID
": ",
457 id
.vpi_index
, PFID(&hdr
->coh_lu
.loh_fid
));
459 vvp_pgcache_page_show(env
, f
, page
);
460 cl_page_put(env
, page
);
462 seq_puts(f
, "missing\n");
463 lu_object_ref_del(&clob
->co_lu
, "dump", current
);
464 cl_object_put(env
, clob
);
466 seq_printf(f
, "%llx missing\n", pos
);
467 cl_env_put(env
, &refcheck
);
470 result
= PTR_ERR(env
);
474 static void *vvp_pgcache_start(struct seq_file
*f
, loff_t
*pos
)
476 struct ll_sb_info
*sbi
;
482 env
= cl_env_get(&refcheck
);
485 if (sbi
->ll_site
->ls_obj_hash
->hs_cur_bits
> 64 - PGC_OBJ_SHIFT
)
486 pos
= ERR_PTR(-EFBIG
);
488 *pos
= vvp_pgcache_find(env
, &sbi
->ll_cl
->cd_lu_dev
,
493 cl_env_put(env
, &refcheck
);
498 static void *vvp_pgcache_next(struct seq_file
*f
, void *v
, loff_t
*pos
)
500 struct ll_sb_info
*sbi
;
504 env
= cl_env_get(&refcheck
);
507 *pos
= vvp_pgcache_find(env
, &sbi
->ll_cl
->cd_lu_dev
, *pos
+ 1);
510 cl_env_put(env
, &refcheck
);
515 static void vvp_pgcache_stop(struct seq_file
*f
, void *v
)
520 static struct seq_operations vvp_pgcache_ops
= {
521 .start
= vvp_pgcache_start
,
522 .next
= vvp_pgcache_next
,
523 .stop
= vvp_pgcache_stop
,
524 .show
= vvp_pgcache_show
527 static int vvp_dump_pgcache_seq_open(struct inode
*inode
, struct file
*filp
)
529 struct ll_sb_info
*sbi
= PDE_DATA(inode
);
530 struct seq_file
*seq
;
533 result
= seq_open(filp
, &vvp_pgcache_ops
);
535 seq
= filp
->private_data
;
541 const struct file_operations vvp_dump_pgcache_file_ops
= {
542 .owner
= THIS_MODULE
,
543 .open
= vvp_dump_pgcache_seq_open
,
546 .release
= seq_release
,