3 * Copyright (C) 2011 Novell Inc.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
11 #include <linux/slab.h>
12 #include <linux/namei.h>
13 #include <linux/file.h>
14 #include <linux/xattr.h>
15 #include <linux/rbtree.h>
16 #include <linux/security.h>
17 #include <linux/cred.h>
18 #include "overlayfs.h"
20 struct ovl_cache_entry
{
24 struct list_head l_node
;
26 struct ovl_cache_entry
*next_maybe_whiteout
;
31 struct ovl_dir_cache
{
34 struct list_head entries
;
37 struct ovl_readdir_data
{
38 struct dir_context ctx
;
39 struct dentry
*dentry
;
42 struct list_head
*list
;
43 struct list_head middle
;
44 struct ovl_cache_entry
*first_maybe_whiteout
;
47 bool d_type_supported
;
53 struct ovl_dir_cache
*cache
;
54 struct list_head
*cursor
;
55 struct file
*realfile
;
56 struct file
*upperfile
;
59 static struct ovl_cache_entry
*ovl_cache_entry_from_node(struct rb_node
*n
)
61 return container_of(n
, struct ovl_cache_entry
, node
);
64 static struct ovl_cache_entry
*ovl_cache_entry_find(struct rb_root
*root
,
65 const char *name
, int len
)
67 struct rb_node
*node
= root
->rb_node
;
71 struct ovl_cache_entry
*p
= ovl_cache_entry_from_node(node
);
73 cmp
= strncmp(name
, p
->name
, len
);
75 node
= p
->node
.rb_right
;
76 else if (cmp
< 0 || len
< p
->len
)
77 node
= p
->node
.rb_left
;
85 static struct ovl_cache_entry
*ovl_cache_entry_new(struct ovl_readdir_data
*rdd
,
86 const char *name
, int len
,
87 u64 ino
, unsigned int d_type
)
89 struct ovl_cache_entry
*p
;
90 size_t size
= offsetof(struct ovl_cache_entry
, name
[len
+ 1]);
92 p
= kmalloc(size
, GFP_KERNEL
);
96 memcpy(p
->name
, name
, len
);
101 p
->is_whiteout
= false;
103 if (d_type
== DT_CHR
) {
104 p
->next_maybe_whiteout
= rdd
->first_maybe_whiteout
;
105 rdd
->first_maybe_whiteout
= p
;
110 static int ovl_cache_entry_add_rb(struct ovl_readdir_data
*rdd
,
111 const char *name
, int len
, u64 ino
,
114 struct rb_node
**newp
= &rdd
->root
.rb_node
;
115 struct rb_node
*parent
= NULL
;
116 struct ovl_cache_entry
*p
;
120 struct ovl_cache_entry
*tmp
;
123 tmp
= ovl_cache_entry_from_node(*newp
);
124 cmp
= strncmp(name
, tmp
->name
, len
);
126 newp
= &tmp
->node
.rb_right
;
127 else if (cmp
< 0 || len
< tmp
->len
)
128 newp
= &tmp
->node
.rb_left
;
133 p
= ovl_cache_entry_new(rdd
, name
, len
, ino
, d_type
);
137 list_add_tail(&p
->l_node
, rdd
->list
);
138 rb_link_node(&p
->node
, parent
, newp
);
139 rb_insert_color(&p
->node
, &rdd
->root
);
144 static int ovl_fill_lowest(struct ovl_readdir_data
*rdd
,
145 const char *name
, int namelen
,
146 loff_t offset
, u64 ino
, unsigned int d_type
)
148 struct ovl_cache_entry
*p
;
150 p
= ovl_cache_entry_find(&rdd
->root
, name
, namelen
);
152 list_move_tail(&p
->l_node
, &rdd
->middle
);
154 p
= ovl_cache_entry_new(rdd
, name
, namelen
, ino
, d_type
);
158 list_add_tail(&p
->l_node
, &rdd
->middle
);
164 void ovl_cache_free(struct list_head
*list
)
166 struct ovl_cache_entry
*p
;
167 struct ovl_cache_entry
*n
;
169 list_for_each_entry_safe(p
, n
, list
, l_node
)
172 INIT_LIST_HEAD(list
);
175 static void ovl_cache_put(struct ovl_dir_file
*od
, struct dentry
*dentry
)
177 struct ovl_dir_cache
*cache
= od
->cache
;
179 WARN_ON(cache
->refcount
<= 0);
181 if (!cache
->refcount
) {
182 if (ovl_dir_cache(dentry
) == cache
)
183 ovl_set_dir_cache(dentry
, NULL
);
185 ovl_cache_free(&cache
->entries
);
190 static int ovl_fill_merge(struct dir_context
*ctx
, const char *name
,
191 int namelen
, loff_t offset
, u64 ino
,
194 struct ovl_readdir_data
*rdd
=
195 container_of(ctx
, struct ovl_readdir_data
, ctx
);
199 return ovl_cache_entry_add_rb(rdd
, name
, namelen
, ino
, d_type
);
201 return ovl_fill_lowest(rdd
, name
, namelen
, offset
, ino
, d_type
);
204 static int ovl_check_whiteouts(struct dentry
*dir
, struct ovl_readdir_data
*rdd
)
207 struct ovl_cache_entry
*p
;
208 struct dentry
*dentry
;
209 const struct cred
*old_cred
;
211 old_cred
= ovl_override_creds(rdd
->dentry
->d_sb
);
213 err
= down_write_killable(&dir
->d_inode
->i_rwsem
);
215 while (rdd
->first_maybe_whiteout
) {
216 p
= rdd
->first_maybe_whiteout
;
217 rdd
->first_maybe_whiteout
= p
->next_maybe_whiteout
;
218 dentry
= lookup_one_len(p
->name
, dir
, p
->len
);
219 if (!IS_ERR(dentry
)) {
220 p
->is_whiteout
= ovl_is_whiteout(dentry
);
224 inode_unlock(dir
->d_inode
);
226 revert_creds(old_cred
);
231 static inline int ovl_dir_read(struct path
*realpath
,
232 struct ovl_readdir_data
*rdd
)
234 struct file
*realfile
;
237 realfile
= ovl_path_open(realpath
, O_RDONLY
| O_DIRECTORY
);
238 if (IS_ERR(realfile
))
239 return PTR_ERR(realfile
);
241 rdd
->first_maybe_whiteout
= NULL
;
246 err
= iterate_dir(realfile
, &rdd
->ctx
);
249 } while (!err
&& rdd
->count
);
251 if (!err
&& rdd
->first_maybe_whiteout
&& rdd
->dentry
)
252 err
= ovl_check_whiteouts(realpath
->dentry
, rdd
);
259 static void ovl_dir_reset(struct file
*file
)
261 struct ovl_dir_file
*od
= file
->private_data
;
262 struct ovl_dir_cache
*cache
= od
->cache
;
263 struct dentry
*dentry
= file
->f_path
.dentry
;
264 enum ovl_path_type type
= ovl_path_type(dentry
);
266 if (cache
&& ovl_dentry_version_get(dentry
) != cache
->version
) {
267 ovl_cache_put(od
, dentry
);
271 WARN_ON(!od
->is_real
&& !OVL_TYPE_MERGE(type
));
272 if (od
->is_real
&& OVL_TYPE_MERGE(type
))
276 static int ovl_dir_read_merged(struct dentry
*dentry
, struct list_head
*list
)
279 struct path realpath
;
280 struct ovl_readdir_data rdd
= {
281 .ctx
.actor
= ovl_fill_merge
,
289 for (idx
= 0; idx
!= -1; idx
= next
) {
290 next
= ovl_path_next(idx
, dentry
, &realpath
);
293 err
= ovl_dir_read(&realpath
, &rdd
);
298 * Insert lowest layer entries before upper ones, this
299 * allows offsets to be reasonably constant
301 list_add(&rdd
.middle
, rdd
.list
);
302 rdd
.is_lowest
= true;
303 err
= ovl_dir_read(&realpath
, &rdd
);
304 list_del(&rdd
.middle
);
310 static void ovl_seek_cursor(struct ovl_dir_file
*od
, loff_t pos
)
315 list_for_each(p
, &od
->cache
->entries
) {
320 /* Cursor is safe since the cache is stable */
324 static struct ovl_dir_cache
*ovl_cache_get(struct dentry
*dentry
)
327 struct ovl_dir_cache
*cache
;
329 cache
= ovl_dir_cache(dentry
);
330 if (cache
&& ovl_dentry_version_get(dentry
) == cache
->version
) {
334 ovl_set_dir_cache(dentry
, NULL
);
336 cache
= kzalloc(sizeof(struct ovl_dir_cache
), GFP_KERNEL
);
338 return ERR_PTR(-ENOMEM
);
341 INIT_LIST_HEAD(&cache
->entries
);
343 res
= ovl_dir_read_merged(dentry
, &cache
->entries
);
345 ovl_cache_free(&cache
->entries
);
350 cache
->version
= ovl_dentry_version_get(dentry
);
351 ovl_set_dir_cache(dentry
, cache
);
356 static int ovl_iterate(struct file
*file
, struct dir_context
*ctx
)
358 struct ovl_dir_file
*od
= file
->private_data
;
359 struct dentry
*dentry
= file
->f_path
.dentry
;
360 struct ovl_cache_entry
*p
;
366 return iterate_dir(od
->realfile
, ctx
);
369 struct ovl_dir_cache
*cache
;
371 cache
= ovl_cache_get(dentry
);
373 return PTR_ERR(cache
);
376 ovl_seek_cursor(od
, ctx
->pos
);
379 while (od
->cursor
!= &od
->cache
->entries
) {
380 p
= list_entry(od
->cursor
, struct ovl_cache_entry
, l_node
);
382 if (!dir_emit(ctx
, p
->name
, p
->len
, p
->ino
, p
->type
))
384 od
->cursor
= p
->l_node
.next
;
390 static loff_t
ovl_dir_llseek(struct file
*file
, loff_t offset
, int origin
)
393 struct ovl_dir_file
*od
= file
->private_data
;
395 inode_lock(file_inode(file
));
400 res
= vfs_llseek(od
->realfile
, offset
, origin
);
401 file
->f_pos
= od
->realfile
->f_pos
;
407 offset
+= file
->f_pos
;
417 if (offset
!= file
->f_pos
) {
418 file
->f_pos
= offset
;
420 ovl_seek_cursor(od
, offset
);
425 inode_unlock(file_inode(file
));
430 static int ovl_dir_fsync(struct file
*file
, loff_t start
, loff_t end
,
433 struct ovl_dir_file
*od
= file
->private_data
;
434 struct dentry
*dentry
= file
->f_path
.dentry
;
435 struct file
*realfile
= od
->realfile
;
438 * Need to check if we started out being a lower dir, but got copied up
440 if (!od
->is_upper
&& OVL_TYPE_UPPER(ovl_path_type(dentry
))) {
441 struct inode
*inode
= file_inode(file
);
443 realfile
= lockless_dereference(od
->upperfile
);
445 struct path upperpath
;
447 ovl_path_upper(dentry
, &upperpath
);
448 realfile
= ovl_path_open(&upperpath
, O_RDONLY
);
449 smp_mb__before_spinlock();
451 if (!od
->upperfile
) {
452 if (IS_ERR(realfile
)) {
454 return PTR_ERR(realfile
);
456 od
->upperfile
= realfile
;
458 /* somebody has beaten us to it */
459 if (!IS_ERR(realfile
))
461 realfile
= od
->upperfile
;
467 return vfs_fsync_range(realfile
, start
, end
, datasync
);
470 static int ovl_dir_release(struct inode
*inode
, struct file
*file
)
472 struct ovl_dir_file
*od
= file
->private_data
;
476 ovl_cache_put(od
, file
->f_path
.dentry
);
487 static int ovl_dir_open(struct inode
*inode
, struct file
*file
)
489 struct path realpath
;
490 struct file
*realfile
;
491 struct ovl_dir_file
*od
;
492 enum ovl_path_type type
;
494 od
= kzalloc(sizeof(struct ovl_dir_file
), GFP_KERNEL
);
498 type
= ovl_path_real(file
->f_path
.dentry
, &realpath
);
499 realfile
= ovl_path_open(&realpath
, file
->f_flags
);
500 if (IS_ERR(realfile
)) {
502 return PTR_ERR(realfile
);
504 od
->realfile
= realfile
;
505 od
->is_real
= !OVL_TYPE_MERGE(type
);
506 od
->is_upper
= OVL_TYPE_UPPER(type
);
507 file
->private_data
= od
;
512 const struct file_operations ovl_dir_operations
= {
513 .read
= generic_read_dir
,
514 .open
= ovl_dir_open
,
515 .iterate
= ovl_iterate
,
516 .llseek
= ovl_dir_llseek
,
517 .fsync
= ovl_dir_fsync
,
518 .release
= ovl_dir_release
,
521 int ovl_check_empty_dir(struct dentry
*dentry
, struct list_head
*list
)
524 struct ovl_cache_entry
*p
;
526 err
= ovl_dir_read_merged(dentry
, list
);
532 list_for_each_entry(p
, list
, l_node
) {
536 if (p
->name
[0] == '.') {
539 if (p
->len
== 2 && p
->name
[1] == '.')
549 void ovl_cleanup_whiteouts(struct dentry
*upper
, struct list_head
*list
)
551 struct ovl_cache_entry
*p
;
553 inode_lock_nested(upper
->d_inode
, I_MUTEX_CHILD
);
554 list_for_each_entry(p
, list
, l_node
) {
555 struct dentry
*dentry
;
560 dentry
= lookup_one_len(p
->name
, upper
, p
->len
);
561 if (IS_ERR(dentry
)) {
562 pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
563 upper
->d_name
.name
, p
->len
, p
->name
,
564 (int) PTR_ERR(dentry
));
568 ovl_cleanup(upper
->d_inode
, dentry
);
571 inode_unlock(upper
->d_inode
);
574 static int ovl_check_d_type(struct dir_context
*ctx
, const char *name
,
575 int namelen
, loff_t offset
, u64 ino
,
578 struct ovl_readdir_data
*rdd
=
579 container_of(ctx
, struct ovl_readdir_data
, ctx
);
581 /* Even if d_type is not supported, DT_DIR is returned for . and .. */
582 if (!strncmp(name
, ".", namelen
) || !strncmp(name
, "..", namelen
))
585 if (d_type
!= DT_UNKNOWN
)
586 rdd
->d_type_supported
= true;
592 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
593 * if error is encountered.
595 int ovl_check_d_type_supported(struct path
*realpath
)
598 struct ovl_readdir_data rdd
= {
599 .ctx
.actor
= ovl_check_d_type
,
600 .d_type_supported
= false,
603 err
= ovl_dir_read(realpath
, &rdd
);
607 return rdd
.d_type_supported
;
610 static void ovl_workdir_cleanup_recurse(struct path
*path
, int level
)
613 struct inode
*dir
= path
->dentry
->d_inode
;
615 struct ovl_cache_entry
*p
;
616 struct ovl_readdir_data rdd
= {
617 .ctx
.actor
= ovl_fill_merge
,
624 err
= ovl_dir_read(path
, &rdd
);
628 inode_lock_nested(dir
, I_MUTEX_PARENT
);
629 list_for_each_entry(p
, &list
, l_node
) {
630 struct dentry
*dentry
;
632 if (p
->name
[0] == '.') {
635 if (p
->len
== 2 && p
->name
[1] == '.')
638 dentry
= lookup_one_len(p
->name
, path
->dentry
, p
->len
);
642 ovl_workdir_cleanup(dir
, path
->mnt
, dentry
, level
);
647 ovl_cache_free(&list
);
650 void ovl_workdir_cleanup(struct inode
*dir
, struct vfsmount
*mnt
,
651 struct dentry
*dentry
, int level
)
655 if (!d_is_dir(dentry
) || level
> 1) {
656 ovl_cleanup(dir
, dentry
);
660 err
= ovl_do_rmdir(dir
, dentry
);
662 struct path path
= { .mnt
= mnt
, .dentry
= dentry
};
665 ovl_workdir_cleanup_recurse(&path
, level
+ 1);
666 inode_lock_nested(dir
, I_MUTEX_PARENT
);
667 ovl_cleanup(dir
, dentry
);
671 int ovl_indexdir_cleanup(struct dentry
*dentry
, struct vfsmount
*mnt
,
672 struct path
*lowerstack
, unsigned int numlower
)
675 struct inode
*dir
= dentry
->d_inode
;
676 struct path path
= { .mnt
= mnt
, .dentry
= dentry
};
678 struct ovl_cache_entry
*p
;
679 struct ovl_readdir_data rdd
= {
680 .ctx
.actor
= ovl_fill_merge
,
687 err
= ovl_dir_read(&path
, &rdd
);
691 inode_lock_nested(dir
, I_MUTEX_PARENT
);
692 list_for_each_entry(p
, &list
, l_node
) {
693 struct dentry
*index
;
695 if (p
->name
[0] == '.') {
698 if (p
->len
== 2 && p
->name
[1] == '.')
701 index
= lookup_one_len(p
->name
, dentry
, p
->len
);
703 err
= PTR_ERR(index
);
706 err
= ovl_verify_index(index
, lowerstack
, numlower
);
710 err
= ovl_cleanup(dir
, index
);
718 ovl_cache_free(&list
);
720 pr_err("overlayfs: failed index dir cleanup (%i)\n", err
);