+ /* we'll free this at cg_release */
+ file_info = malloc(sizeof(*file_info));
+ if (!file_info) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ file_info->controller = must_copy_string(controller);
+ file_info->cgroup = must_copy_string(path1);
+ file_info->file = must_copy_string(path2);
+ file_info->type = LXC_TYPE_CGFILE;
+ file_info->buf = NULL;
+ file_info->buflen = 0;
+
+ fi->fh = (unsigned long)file_info;
+ ret = 0;
+
+out:
+ free(cgdir);
+ return ret;
+}
+
+static int cg_release(const char *path, struct fuse_file_info *fi)
+{
+ struct file_info *f = (struct file_info *)fi->fh;
+
+ do_release_file_info(f);
+ return 0;
+}
+
+#define POLLIN_SET ( EPOLLIN | EPOLLHUP | EPOLLRDHUP )
+
+static bool wait_for_sock(int sock, int timeout)
+{
+ struct epoll_event ev;
+ int epfd, ret, now, starttime, deltatime, saved_errno;
+
+ if ((starttime = time(NULL)) < 0)
+ return false;
+
+ if ((epfd = epoll_create(1)) < 0) {
+ fprintf(stderr, "Failed to create epoll socket: %m\n");
+ return false;
+ }
+
+ ev.events = POLLIN_SET;
+ ev.data.fd = sock;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, sock, &ev) < 0) {
+ fprintf(stderr, "Failed adding socket to epoll: %m\n");
+ close(epfd);
+ return false;
+ }
+
+again:
+ if ((now = time(NULL)) < 0) {
+ close(epfd);
+ return false;
+ }
+
+ deltatime = (starttime + timeout) - now;
+ if (deltatime < 0) { // timeout
+ errno = 0;
+ close(epfd);
+ return false;
+ }
+ ret = epoll_wait(epfd, &ev, 1, 1000*deltatime + 1);
+ if (ret < 0 && errno == EINTR)
+ goto again;
+ saved_errno = errno;
+ close(epfd);
+
+ if (ret <= 0) {
+ errno = saved_errno;
+ return false;
+ }
+ return true;
+}
+
+static int msgrecv(int sockfd, void *buf, size_t len)
+{
+ if (!wait_for_sock(sockfd, 2))
+ return -1;
+ return recv(sockfd, buf, len, MSG_DONTWAIT);
+}
+
+static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst)
+{
+ struct msghdr msg = { 0 };
+ struct iovec iov;
+ struct cmsghdr *cmsg;
+ char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
+ char buf[1];
+ buf[0] = 'p';
+
+ if (pingfirst) {
+ if (msgrecv(sock, buf, 1) != 1) {
+ fprintf(stderr, "%s: Error getting reply from server over socketpair\n",
+ __func__);
+ return SEND_CREDS_FAIL;
+ }
+ }
+
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ memcpy(CMSG_DATA(cmsg), cred, sizeof(*cred));
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+
+ buf[0] = v;
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ if (sendmsg(sock, &msg, 0) < 0) {
+ fprintf(stderr, "%s: failed at sendmsg: %s\n", __func__,
+ strerror(errno));
+ if (errno == 3)
+ return SEND_CREDS_NOTSK;
+ return SEND_CREDS_FAIL;
+ }
+
+ return SEND_CREDS_OK;
+}
+
+static bool recv_creds(int sock, struct ucred *cred, char *v)
+{
+ struct msghdr msg = { 0 };
+ struct iovec iov;
+ struct cmsghdr *cmsg;
+ char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
+ char buf[1];
+ int ret;
+ int optval = 1;
+
+ *v = '1';
+
+ cred->pid = -1;
+ cred->uid = -1;
+ cred->gid = -1;
+
+ if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
+ fprintf(stderr, "Failed to set passcred: %s\n", strerror(errno));
+ return false;
+ }
+ buf[0] = '1';
+ if (write(sock, buf, 1) != 1) {
+ fprintf(stderr, "Failed to start write on scm fd: %s\n", strerror(errno));
+ return false;
+ }
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ if (!wait_for_sock(sock, 2)) {
+ fprintf(stderr, "Timed out waiting for scm_cred: %s\n",
+ strerror(errno));
+ return false;
+ }
+ ret = recvmsg(sock, &msg, MSG_DONTWAIT);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to receive scm_cred: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+
+ if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)) &&
+ cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS) {
+ memcpy(cred, CMSG_DATA(cmsg), sizeof(*cred));
+ }
+ *v = buf[0];
+
+ return true;
+}
+
+
+/*
+ * pid_to_ns - reads pids from a ucred over a socket, then writes the
+ * int value back over the socket. This shifts the pid from the
+ * sender's pidns into tpid's pidns.
+ */
+static void pid_to_ns(int sock, pid_t tpid)
+{
+ char v = '0';
+ struct ucred cred;
+
+ while (recv_creds(sock, &cred, &v)) {
+ if (v == '1')
+ _exit(0);
+ if (write(sock, &cred.pid, sizeof(pid_t)) != sizeof(pid_t))
+ _exit(1);
+ }
+ _exit(0);
+}
+
+/*
+ * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
+ * in your old pidns. Only children which you fork will be in the target
+ * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
+ * actually convert pids
+ */
+static void pid_to_ns_wrapper(int sock, pid_t tpid)
+{
+ int newnsfd = -1, ret, cpipe[2];
+ char fnam[100];
+ pid_t cpid;
+ char v;
+
+ ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
+ if (ret < 0 || ret >= sizeof(fnam))
+ _exit(1);
+ newnsfd = open(fnam, O_RDONLY);
+ if (newnsfd < 0)
+ _exit(1);
+ if (setns(newnsfd, 0) < 0)
+ _exit(1);
+ close(newnsfd);
+
+ if (pipe(cpipe) < 0)
+ _exit(1);
+
+ cpid = fork();
+ if (cpid < 0)
+ _exit(1);
+
+ if (!cpid) {
+ char b = '1';
+ close(cpipe[0]);
+ if (write(cpipe[1], &b, sizeof(char)) < 0) {
+ fprintf(stderr, "%s (child): erorr on write: %s\n",
+ __func__, strerror(errno));
+ }
+ close(cpipe[1]);
+ pid_to_ns(sock, tpid);
+ _exit(1); // not reached
+ }
+ // give the child 1 second to be done forking and
+ // write its ack
+ if (!wait_for_sock(cpipe[0], 1))
+ _exit(1);
+ ret = read(cpipe[0], &v, 1);
+ if (ret != sizeof(char) || v != '1')
+ _exit(1);
+
+ if (!wait_for_pid(cpid))
+ _exit(1);
+ _exit(0);
+}
+
+/*
+ * To read cgroup files with a particular pid, we will setns into the child
+ * pidns, open a pipe, fork a child - which will be the first to really be in
+ * the child ns - which does the cgfs_get_value and writes the data to the pipe.
+ */
+static bool do_read_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, char **d)
+{
+ int sock[2] = {-1, -1};
+ char *tmpdata = NULL;
+ int ret;
+ pid_t qpid, cpid = -1;
+ bool answer = false;
+ char v = '0';
+ struct ucred cred;
+ size_t sz = 0, asz = 0;
+
+ if (!cgfs_get_value(contrl, cg, file, &tmpdata))
+ return false;
+
+ /*
+ * Now we read the pids from returned data one by one, pass
+ * them into a child in the target namespace, read back the
+ * translated pids, and put them into our to-return data
+ */
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
+ perror("socketpair");
+ free(tmpdata);
+ return false;
+ }
+
+ cpid = fork();
+ if (cpid == -1)
+ goto out;
+
+ if (!cpid) // child - exits when done
+ pid_to_ns_wrapper(sock[1], tpid);
+
+ char *ptr = tmpdata;
+ cred.uid = 0;
+ cred.gid = 0;
+ while (sscanf(ptr, "%d\n", &qpid) == 1) {
+ cred.pid = qpid;
+ ret = send_creds(sock[0], &cred, v, true);
+
+ if (ret == SEND_CREDS_NOTSK)
+ goto next;
+ if (ret == SEND_CREDS_FAIL)
+ goto out;
+
+ // read converted results
+ if (!wait_for_sock(sock[0], 2)) {
+ fprintf(stderr, "%s: timed out waiting for pid from child: %s\n",
+ __func__, strerror(errno));
+ goto out;
+ }
+ if (read(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
+ fprintf(stderr, "%s: error reading pid from child: %s\n",
+ __func__, strerror(errno));
+ goto out;
+ }
+ must_strcat_pid(d, &sz, &asz, qpid);
+next:
+ ptr = strchr(ptr, '\n');
+ if (!ptr)
+ break;
+ ptr++;
+ }
+
+ cred.pid = getpid();
+ v = '1';
+ if (send_creds(sock[0], &cred, v, true) != SEND_CREDS_OK) {
+ // failed to ask child to exit
+ fprintf(stderr, "%s: failed to ask child to exit: %s\n",
+ __func__, strerror(errno));
+ goto out;
+ }
+
+ answer = true;
+
+out:
+ free(tmpdata);
+ if (cpid != -1)
+ wait_for_pid(cpid);
+ if (sock[0] != -1) {
+ close(sock[0]);
+ close(sock[1]);
+ }
+ return answer;