struct blk_plug plug;
struct blkdev_dio *dio;
struct bio *bio;
- bool is_read = (iov_iter_rw(iter) == READ);
+ bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
blk_qc_t qc = BLK_QC_T_NONE;
int ret;
bio_get(bio); /* extra ref for the completion handler */
dio = container_of(bio, struct blkdev_dio, bio);
- dio->is_sync = is_sync_kiocb(iocb);
+ dio->is_sync = is_sync = is_sync_kiocb(iocb);
if (dio->is_sync)
dio->waiter = current;
else
}
blk_finish_plug(&plug);
- if (!dio->is_sync)
+ if (!is_sync)
return -EIOCBQUEUED;
for (;;) {
void *holder)
{
struct block_device *bdev;
+ int perm = 0;
int err;
- bdev = lookup_bdev(path);
+ if (mode & FMODE_READ)
+ perm |= MAY_READ;
+ if (mode & FMODE_WRITE)
+ perm |= MAY_WRITE;
+ bdev = lookup_bdev(path, perm);
if (IS_ERR(bdev))
return bdev;
if (bdev == NULL)
return -ENOMEM;
+ /*
+ * A negative i_writecount for bdev->bd_inode means that the bdev
+ * or one of its paritions is mounted in a user namespace. Deny
+ * writing for non-root in this case, otherwise an unprivileged
+ * user can attack the kernel by modifying the backing store of a
+ * mounted filesystem.
+ */
+ if ((filp->f_mode & FMODE_WRITE) &&
+ !file_ns_capable(filp, &init_user_ns, CAP_SYS_ADMIN) &&
+ !atomic_inc_unless_negative(&bdev->bd_inode->i_writecount)) {
+ bdput(bdev);
+ return -EBUSY;
+ }
+
filp->f_mapping = bdev->bd_inode->i_mapping;
return blkdev_get(bdev, filp->f_mode, filp);
static int blkdev_close(struct inode * inode, struct file * filp)
{
struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
+ if (filp->f_mode & FMODE_WRITE &&
+ !file_ns_capable(filp, &init_user_ns, CAP_SYS_ADMIN))
+ atomic_dec(&bdev->bd_inode->i_writecount);
blkdev_put(bdev, filp->f_mode);
return 0;
}
/**
* lookup_bdev - lookup a struct block_device by name
* @pathname: special file representing the block device
+ * @mask: rights to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
*
* Get a reference to the blockdevice at @pathname in the current
* namespace if possible and return it. Return ERR_PTR(error)
- * otherwise.
+ * otherwise. If @mask is non-zero, check for access rights to the
+ * inode at @pathname.
*/
-struct block_device *lookup_bdev(const char *pathname)
+struct block_device *lookup_bdev(const char *pathname, int mask)
{
struct block_device *bdev;
struct inode *inode;
return ERR_PTR(error);
inode = d_backing_inode(path.dentry);
+ if (mask != 0 && !capable(CAP_SYS_ADMIN)) {
+ error = __inode_permission(inode, mask);
+ if (error)
+ goto fail;
+ }
error = -ENOTBLK;
if (!S_ISBLK(inode->i_mode))
goto fail;