int Client::CommandHook::call(
std::string_view command,
const cmdmap_t& cmdmap,
+ const bufferlist&,
Formatter *f,
std::ostream& errss,
bufferlist& out)
issued |= in->caps_dirty();
int new_issued = ~issued & (int)st->cap.caps;
+ bool need_snapdir_attr_refresh = false;
if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
!(issued & CEPH_CAP_AUTH_EXCL)) {
in->mode = st->mode;
in->btime = st->btime;
in->snap_btime = st->snap_btime;
in->snap_metadata = st->snap_metadata;
+ need_snapdir_attr_refresh = true;
}
if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
}
if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
+ need_snapdir_attr_refresh = true;
update_inode_file_time(in, issued, st->time_warp_seq,
st->ctime, st->mtime, st->atime);
}
auto p = st->xattrbl.cbegin();
decode(in->xattrs, p);
in->xattr_version = st->xattr_version;
+ need_snapdir_attr_refresh = true;
}
if (st->inline_version > in->inline_version) {
}
/* always take a newer change attr */
+ ldout(cct, 12) << __func__ << " client inode change_attr: " << in->change_attr << " , mds inodestat change_attr: " << st->change_attr << dendl;
if (st->change_attr > in->change_attr)
in->change_attr = st->change_attr;
}
in->fscrypt = st->fscrypt;
+ if (need_snapdir_attr_refresh && in->is_dir() && in->snapid == CEPH_NOSNAP) {
+ vinodeno_t vino(in->ino, CEPH_SNAPDIR);
+ if (inode_map.count(vino)) {
+ refresh_snapdir_attrs(inode_map[vino], in);
+ }
+ }
+
return in;
}
const UserPerm& perms,
InodeRef *ptarget, bool *pcreated,
mds_rank_t use_mds,
- bufferlist *pdirbl)
+ bufferlist *pdirbl,
+ size_t feature_needed)
{
int r = 0;
session = mds_sessions.at(mds);
}
+ if (feature_needed != ULONG_MAX && !session->mds_features.test(feature_needed)) {
+ request->abort(-CEPHFS_EOPNOTSUPP);
+ break;
+ }
+
// send request.
send_request(request, session.get());
request->caller_cond = nullptr;
// did we get a reply?
- if (request->reply)
+ if (request->reply)
break;
}
switch (m->get_op()) {
case CEPH_SESSION_OPEN:
{
+ if (session->state == MetaSession::STATE_OPEN) {
+ ldout(cct, 10) << "mds." << from << " already opened, ignore it"
+ << dendl;
+ return;
+ }
+ /*
+ * The connection maybe broken and the session in client side
+ * has been reinitialized, need to update the seq anyway.
+ */
+ if (!session->seq && m->get_seq())
+ session->seq = m->get_seq();
+
feature_bitset_t missing_features(CEPHFS_FEATURES_CLIENT_REQUIRED);
missing_features -= m->supported_features;
if (!missing_features.empty()) {
std::pair<int, bool> Client::_do_remount(bool retry_on_error)
{
- uint64_t max_retries = cct->_conf.get_val<uint64_t>("mds_max_retries_on_remount_failure");
+ uint64_t max_retries = cct->_conf.get_val<uint64_t>("client_max_retries_on_remount_failure");
bool abort_on_failure = false;
errno = 0;
explicit C_Client_Remount(Client *c) : client(c) {}
void finish(int r) override {
ceph_assert(r == 0);
- client->_do_remount(true);
+ auto result = client->_do_remount(true);
+ if (result.second) {
+ ceph_abort();
+ }
}
};
mref_writer.update_state(CLIENT_UNMOUNTED);
+ /*
+ * Stop the remount_queue before clearing the mountpoint memory
+ * to avoid possible use-after-free bug.
+ */
+ if (remount_cb) {
+ ldout(cct, 10) << "unmount stopping remount finisher" << dendl;
+ remount_finisher.wait_for_empty();
+ remount_finisher.stop();
+ remount_cb = nullptr;
+ }
+
ldout(cct, 2) << "unmounted." << dendl;
}
req->set_string2(xattr_name);
bufferlist bl;
- int res = make_request(req, perms, nullptr, nullptr, rank, &bl);
+ int res = make_request(req, perms, nullptr, nullptr, rank, &bl,
+ CEPHFS_FEATURE_OP_GETVXATTR);
ldout(cct, 10) << __func__ << " result=" << res << dendl;
if (res < 0) {
+ if (res == -CEPHFS_EOPNOTSUPP) {
+ return -CEPHFS_ENODATA;
+ }
return res;
}
if (!mask) {
in->change_attr++;
+ if (in->is_dir() && in->snapid == CEPH_NOSNAP) {
+ vinodeno_t vino(in->ino, CEPH_SNAPDIR);
+ if (inode_map.count(vino)) {
+ refresh_snapdir_attrs(inode_map[vino], in);
+ }
+ }
return 0;
}
stat_set_mtime_sec(st, in->mtime.sec());
stat_set_mtime_nsec(st, in->mtime.nsec());
if (in->is_dir()) {
- if (cct->_conf->client_dirsize_rbytes)
+ if (cct->_conf->client_dirsize_rbytes) {
st->st_size = in->rstat.rbytes;
- else
+ } else if (in->snapid == CEPH_SNAPDIR) {
+ SnapRealm *realm = get_snap_realm_maybe(in->vino().ino);
+ if (realm) {
+ st->st_size = realm->my_snaps.size();
+ put_snap_realm(realm);
+ }
+ } else {
st->st_size = in->dirstat.size();
+ }
// The Windows "stat" structure provides just a subset of the fields that are
// available on Linux.
#ifndef _WIN32
{
ldout(cct, 10) << __func__ << " on " << in->ino << " snap/dev" << in->snapid
<< " mode 0" << oct << in->mode << dec
- << " mtime " << in->mtime << " ctime " << in->ctime << dendl;
+ << " mtime " << in->mtime << " ctime " << in->ctime << " change_attr " << in->change_attr << dendl;
memset(stx, 0, sizeof(struct ceph_statx));
/*
in->mtime.to_timespec(&stx->stx_mtime);
if (in->is_dir()) {
- if (cct->_conf->client_dirsize_rbytes)
+ if (cct->_conf->client_dirsize_rbytes) {
stx->stx_size = in->rstat.rbytes;
- else
+ } else if (in->snapid == CEPH_SNAPDIR) {
+ SnapRealm *realm = get_snap_realm_maybe(in->vino().ino);
+ if (realm) {
+ stx->stx_size = realm->my_snaps.size();
+ put_snap_realm(realm);
+ }
+ } else {
stx->stx_size = in->dirstat.size();
+ }
stx->stx_blocks = 1;
} else {
stx->stx_size = in->size;
int want, have = 0;
bool movepos = false;
- std::unique_ptr<C_SaferCond> onuninline;
int64_t rc = 0;
const auto& conf = cct->_conf;
Inode *in = f->inode.get();
have &= ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO);
if (in->inline_version < CEPH_INLINE_NONE) {
- if (!(have & CEPH_CAP_FILE_CACHE)) {
- onuninline.reset(new C_SaferCond("Client::_read_uninline_data flock"));
- uninline_data(in, onuninline.get());
- } else {
- uint32_t len = in->inline_data.length();
- uint64_t endoff = offset + size;
- if (endoff > in->size)
- endoff = in->size;
-
- if (offset < len) {
- if (endoff <= len) {
- bl->substr_of(in->inline_data, offset, endoff - offset);
- } else {
- bl->substr_of(in->inline_data, offset, len - offset);
- bl->append_zero(endoff - len);
- }
- rc = endoff - offset;
- } else if ((uint64_t)offset < endoff) {
- bl->append_zero(endoff - offset);
- rc = endoff - offset;
+ uint32_t len = in->inline_data.length();
+ uint64_t endoff = offset + size;
+ if (endoff > in->size)
+ endoff = in->size;
+
+ if (offset < len) {
+ if (endoff <= len) {
+ bl->substr_of(in->inline_data, offset, endoff - offset);
} else {
- rc = 0;
+ bl->substr_of(in->inline_data, offset, len - offset);
+ bl->append_zero(endoff - len);
}
- goto success;
+ rc = endoff - offset;
+ } else if ((uint64_t)offset < endoff) {
+ bl->append_zero(endoff - offset);
+ rc = endoff - offset;
+ } else {
+ rc = 0;
}
+ goto success;
}
if (!conf->client_debug_force_sync_read &&
done:
// done!
-
- if (onuninline) {
- client_lock.unlock();
- int ret = onuninline->wait();
- client_lock.lock();
- if (ret >= 0 || ret == -CEPHFS_ECANCELED) {
- in->inline_data.clear();
- in->inline_version = CEPH_INLINE_NONE;
- in->mark_caps_dirty(CEPH_CAP_FILE_WR);
- check_caps(in, 0);
- } else
- rc = ret;
- }
if (have) {
put_cap_ref(in, CEPH_CAP_FILE_RD);
}
can_invalidate_dentries = can_invalidate;
+ /*
+ * Force to use the old and slow method to invalidate the dcache
+ * if the euid is non-root, or the remount may fail with return
+ * code 1 or 32.
+ */
+ uid_t euid = geteuid();
+ ldout(cct, 10) << "euid: " << euid << dendl;
+ if (euid != 0) {
+ can_invalidate_dentries = true;
+ }
+
if (can_invalidate_dentries) {
ceph_assert(dentry_invalidate_cb);
ldout(cct, 1) << "using dentry_invalidate_cb" << dendl;
// =========================================
// low level
+void Client::refresh_snapdir_attrs(Inode *in, Inode *diri) {
+ ldout(cct, 10) << __func__ << ": snapdir inode=" << *in
+ << ", inode=" << *diri << dendl;
+ in->ino = diri->ino;
+ in->snapid = CEPH_SNAPDIR;
+ in->mode = diri->mode;
+ in->uid = diri->uid;
+ in->gid = diri->gid;
+ in->nlink = 1;
+ in->mtime = diri->mtime;
+ in->ctime = diri->ctime;
+ in->btime = diri->btime;
+ in->atime = diri->atime;
+ in->size = diri->size;
+ in->change_attr = diri->change_attr;
+
+ in->dirfragtree.clear();
+ in->snapdir_parent = diri;
+ // copy posix acls to snapshotted inode
+ in->xattrs.clear();
+ for (auto &[xattr_key, xattr_value] : diri->xattrs) {
+ if (xattr_key.rfind("system.", 0) == 0) {
+ in->xattrs[xattr_key] = xattr_value;
+ }
+ }
+}
+
Inode *Client::open_snapdir(Inode *diri)
{
Inode *in;
vinodeno_t vino(diri->ino, CEPH_SNAPDIR);
if (!inode_map.count(vino)) {
in = new Inode(this, vino, &diri->layout);
-
- in->ino = diri->ino;
- in->snapid = CEPH_SNAPDIR;
- in->mode = diri->mode;
- in->uid = diri->uid;
- in->gid = diri->gid;
- in->nlink = 1;
- in->mtime = diri->mtime;
- in->ctime = diri->ctime;
- in->btime = diri->btime;
- in->atime = diri->atime;
- in->size = diri->size;
- in->change_attr = diri->change_attr;
-
- in->dirfragtree.clear();
- in->snapdir_parent = diri;
+ refresh_snapdir_attrs(in, diri);
diri->flags |= I_SNAPDIR_OPEN;
inode_map[vino] = in;
if (use_faked_inos())