]> git.proxmox.com Git - mirror_lxcfs.git/commitdiff
sysfs: don't mask cpus in /sys/devices/system/cpu
authorTycho Andersen <tycho@tycho.pizza>
Thu, 27 Oct 2022 16:23:08 +0000 (10:23 -0600)
committerTycho Andersen <tycho@tycho.pizza>
Wed, 4 Jan 2023 16:32:19 +0000 (09:32 -0700)
The kernel does not mask the cpu%d dirs when they are offlined:

(root) /sys/devices/system/cpu # cat online
0-7
(root) /sys/devices/system/cpu # chcpu -d 4
CPU 4 disabled
(root) /sys/devices/system/cpu # cat online
0-3,5-7
(root) /sys/devices/system/cpu # cat offline
4
(root) /sys/devices/system/cpu # ls -al
total 0
drwxr-xr-x 16 root root    0 Oct 25 20:42 .
drwxr-xr-x 10 root root    0 Oct 25 20:42 ..
drwxr-xr-x  7 root root    0 Oct 25 20:42 cpu0
drwxr-xr-x  7 root root    0 Oct 25 20:42 cpu1
drwxr-xr-x  7 root root    0 Oct 25 20:42 cpu2
drwxr-xr-x  7 root root    0 Oct 25 20:42 cpu3
drwxr-xr-x  5 root root    0 Oct 25 20:42 cpu4
drwxr-xr-x  7 root root    0 Oct 25 20:42 cpu5
drwxr-xr-x  7 root root    0 Oct 25 20:42 cpu6
drwxr-xr-x  7 root root    0 Oct 25 20:42 cpu7
drwxr-xr-x  2 root root    0 Oct 25 20:43 cpufreq
drwxr-xr-x  2 root root    0 Oct 26 15:19 cpuidle
drwxr-xr-x  2 root root    0 Oct 26 15:19 hotplug
-r--r--r--  1 root root 4096 Oct 25 20:42 isolated
-r--r--r--  1 root root 4096 Oct 25 20:43 kernel_max
-r--r--r--  1 root root 4096 Oct 26 15:19 modalias
-r--r--r--  1 root root 4096 Oct 26 15:19 offline
-r--r--r--  1 root root 4096 Oct 25 20:42 online
-r--r--r--  1 root root 4096 Oct 25 20:43 possible
drwxr-xr-x  2 root root    0 Oct 26 15:19 power
-r--r--r--  1 root root 4096 Oct 25 20:43 present
drwxr-xr-x  2 root root    0 Oct 26 15:19 smt
-rw-r--r--  1 root root 4096 Oct 25 20:42 uevent
drwxr-xr-x  2 root root    0 Oct 26 15:19 vulnerabilities

let's not mask them in lxcfs either. In particular, we have observed this
causing problems with some JVMs' implementation of
Runtime.getRuntime().availableProcessors().

This is a bit of a strange patch: it seems masking this dir was always
incorrect, so we could go back to just not offering it as an lxcfs
endpoint, and having people use sysfs' implementation directly. But maybe
people are expecting it now, so I've left it as a proxy. Perhaps a more
appropriate patch is to just delete it entirely and add an API extension
note?

Signed-off-by: Tycho Andersen <tycho@tycho.pizza>
README.md
src/sysfs_fuse.c
tests/main.sh.in
tests/meson.build
tests/test_sysfs.in [deleted file]

index 50045e02ad4df627e5fdf1657c65919d4c3d9be3..381b4e49f04c249ae9c5e3a7a89bcaf17acaa3a1 100644 (file)
--- a/README.md
+++ b/README.md
@@ -16,7 +16,6 @@ such as:
 /proc/swaps
 /proc/uptime
 /proc/slabinfo
-/sys/devices/system/cpu
 /sys/devices/system/cpu/online
 ```
 
index 5e3b631379af0c0c85ca9090c8e0b4f48fc44c5a..e8671b616e3d721db5aed794efa657e176017662 100644 (file)
 #include "utils.h"
 
 static off_t get_sysfile_size(const char *which);
-/* Create cpumask from cpulist aka turn:
- *
- *     0,2-3
- *
- * into bit array
- *
- *     1 0 1 1
- */
-static int lxc_cpumask(char *buf, __u32 **bitarr, __u32 *last_set_bit)
-{
-       __do_free __u32 *arr_u32 = NULL;
-       __u32 cur_last_set_bit = 0, nbits = 256;
-       __u32 nr_u32;
-       char *token;
-
-       nr_u32 = BITS_TO_LONGS(nbits);
-       arr_u32 = zalloc(nr_u32 * sizeof(__u32));
-       if (!arr_u32)
-               return ret_errno(ENOMEM);
-
-       lxc_iterate_parts(token, buf, ",") {
-               __u32 last_bit, first_bit;
-               char *range;
-
-               errno = 0;
-               first_bit = strtoul(token, NULL, 0);
-               last_bit = first_bit;
-               range = strchr(token, '-');
-               if (range)
-                       last_bit = strtoul(range + 1, NULL, 0);
-
-               if (!(first_bit <= last_bit))
-                       return ret_errno(EINVAL);
-
-               if (last_bit >= nbits) {
-                       __u32 add_bits = last_bit - nbits + 32;
-                       __u32 new_nr_u32;
-                       __u32 *p;
-
-                       new_nr_u32 = BITS_TO_LONGS(nbits + add_bits);
-                       p = realloc(arr_u32, new_nr_u32 * sizeof(uint32_t));
-                       if (!p)
-                               return ret_errno(ENOMEM);
-                       arr_u32 = move_ptr(p);
-
-                       memset(arr_u32 + nr_u32, 0,
-                              (new_nr_u32 - nr_u32) * sizeof(uint32_t));
-                       nbits += add_bits;
-               }
-
-               while (first_bit <= last_bit)
-                       set_bit(first_bit++, arr_u32);
-
-               if (last_bit > cur_last_set_bit)
-                       cur_last_set_bit = last_bit;
-       }
-
-       *last_set_bit = cur_last_set_bit;
-       *bitarr = move_ptr(arr_u32);
-       return 0;
-}
-
-static int lxc_cpumask_update(char *buf, __u32 *bitarr, __u32 last_set_bit,
-                             bool clear)
-{
-       bool flipped = false;
-       char *token;
-
-       lxc_iterate_parts(token, buf, ",") {
-               __u32 last_bit, first_bit;
-               char *range;
-
-               errno = 0;
-               first_bit = strtoul(token, NULL, 0);
-               last_bit = first_bit;
-               range = strchr(token, '-');
-               if (range)
-                       last_bit = strtoul(range + 1, NULL, 0);
-
-               if (!(first_bit <= last_bit)) {
-                       lxcfs_debug("The cup range seems to be inverted: %u-%u", first_bit, last_bit);
-                       continue;
-               }
-
-               if (last_bit > last_set_bit)
-                       continue;
-
-               while (first_bit <= last_bit) {
-                       if (clear && is_set(first_bit, bitarr)) {
-                               flipped = true;
-                               clear_bit(first_bit, bitarr);
-                       } else if (!clear && !is_set(first_bit, bitarr)) {
-                               flipped = true;
-                               set_bit(first_bit, bitarr);
-                       }
-
-                       first_bit++;
-               }
-       }
-
-       if (flipped)
-               return 1;
-
-       return 0;
-}
-
-#define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
-#define __OFFLINE_CPUS "/sys/devices/system/cpu/offline"
-static int cpumask(char *posscpus, __u32 **bitarr, __u32 *last_set_bit)
-{
-       __do_free char *isolcpus = NULL, *offlinecpus = NULL;
-       __do_free __u32 *possmask = NULL;
-       int ret;
-       __u32 poss_last_set_bit = 0;
-
-       if (file_exists(__ISOL_CPUS)) {
-               isolcpus = read_file_at(-EBADF, __ISOL_CPUS, PROTECT_OPEN);
-               if (!isolcpus)
-                       return -1;
-
-               if (!isdigit(isolcpus[0]))
-                       free_disarm(isolcpus);
-       } else {
-               lxcfs_debug("The path \""__ISOL_CPUS"\" to read isolated cpus from does not exist");
-       }
-
-       if (file_exists(__OFFLINE_CPUS)) {
-               offlinecpus = read_file_at(-EBADF, __OFFLINE_CPUS, PROTECT_OPEN);
-               if (!offlinecpus)
-                       return -1;
-
-               if (!isdigit(offlinecpus[0]))
-                       free_disarm(offlinecpus);
-       } else {
-               lxcfs_debug("The path \""__OFFLINE_CPUS"\" to read offline cpus from does not exist");
-       }
-
-       ret = lxc_cpumask(posscpus, &possmask, &poss_last_set_bit);
-       if (ret)
-               return ret;
-
-       if (isolcpus)
-               ret = lxc_cpumask_update(isolcpus, possmask, poss_last_set_bit, true);
-
-       if (offlinecpus)
-               ret |= lxc_cpumask_update(offlinecpus, possmask, poss_last_set_bit, true);
-       if (ret)
-               return ret;
-
-       *bitarr = move_ptr(possmask);
-       *last_set_bit = poss_last_set_bit;
-       return 0;
-}
-
 static int do_cpuset_read(char *cg, char *buf, size_t buflen)
 {
         __do_free char *cpuset = NULL;
@@ -299,61 +145,14 @@ static int sys_devices_system_cpu_online_getsize(const char *path)
 static int filler_sys_devices_system_cpu(const char *path, void *buf,
                                         fuse_fill_dir_t filler)
 {
-       __do_free __u32 *bitarr = NULL;
-       __do_free char *cg = NULL, *cpuset = NULL;
        __do_closedir DIR *dirp = NULL;
-       struct fuse_context *fc = fuse_get_context();
-       __u32 last_set_bit = 0;
-       int ret;
        struct dirent *dirent;
-       pid_t initpid;
-
-       initpid = lookup_initpid_in_store(fc->pid);
-       if (initpid <= 1 || is_shared_pidns(initpid))
-               initpid = fc->pid;
-
-       cg = get_pid_cgroup(initpid, "cpuset");
-       if (!cg)
-               return 0;
-       prune_init_slice(cg);
-
-       cpuset = get_cpuset(cg);
-       if (!cpuset)
-               return 0;
-
-       ret = cpumask(cpuset, &bitarr, &last_set_bit);
-       if (ret)
-               return ret;
 
        dirp = opendir(path);
        if (!dirp)
                return -ENOENT;
 
-       for (__u32 bit = 0; bit <= last_set_bit; bit++) {
-               char cpu[100];
-
-               if (!is_set(bit, bitarr))
-                       continue;
-
-               ret = snprintf(cpu, sizeof(cpu), "cpu%u", bit);
-               if (ret < 0 || (size_t)ret >= sizeof(cpu))
-                       continue;
-
-               if (dir_fillerat(filler, dirp, cpu, buf, 0) != 0)
-                       return -ENOENT;
-       }
-
        while ((dirent = readdir(dirp))) {
-               char *entry = dirent->d_name;
-
-               if (strlen(entry) > 3) {
-                       entry += 3;
-
-                       /* Don't emit entries we already filtered above. */
-                       if (isdigit(*entry))
-                               continue;
-               }
-
                if (dirent_fillerat(filler, dirp, dirent, buf, 0) != 0)
                        return -ENOENT;
        }
@@ -590,10 +389,10 @@ __lxcfs_fuse_ops int sys_readdir(const char *path, void *buf,
                        return -ENOENT;
                return 0;
        case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU:
-               if (dir_filler(filler, buf, ".", 0) != 0 ||
-                   dir_filler(filler, buf, "..", 0) != 0)
+               if (dir_filler(filler, buf, ".",        0) != 0 ||
+                   dir_filler(filler, buf, "..",       0) != 0 ||
+                   dirent_filler(filler, path, "online", buf,  0) != 0)
                        return -ENOENT;
-
                return filler_sys_devices_system_cpu(path, buf, filler);
        case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBDIR:
                dirp = opendir_flags(path, O_CLOEXEC | O_NOFOLLOW);
index 20571ee1147b0ab7994f3431fbc94d6df7dab47f..656bb6a03378cd61c6f05bf391be2afc10d22afe 100755 (executable)
@@ -82,8 +82,6 @@ TESTCASE="Stress readdir"
 RUNTEST ${dirname}/test_readdir
 TESTCASE="test_proc"
 RUNTEST ${dirname}/test_proc
-TESTCASE="test_sysfs"
-RUNTEST ${dirname}/test_sysfs
 TESTCASE="test_cgroup"
 RUNTEST ${dirname}/test_cgroup
 TESTCASE="test_read_proc.sh"
index fb461d062e5dbc097b999199fe5bad18adeddfb9..ad8699c237dea45ece80e5016026df1974aa836c 100644 (file)
@@ -60,18 +60,6 @@ test_programs += custom_target(
         '@OUTPUT@',
     ])
 
-test_programs += custom_target(
-   'test_sysfs',
-    build_by_default: want_tests != false,
-    input: 'test_sysfs.in',
-    output: 'test_sysfs',
-    command: [
-        meson_render_jinja2,
-        config_h,
-        '@INPUT@',
-        '@OUTPUT@',
-    ])
-
 test_programs += custom_target(
     'test_read_proc.sh',
     build_by_default: want_tests != false,
diff --git a/tests/test_sysfs.in b/tests/test_sysfs.in
deleted file mode 100755 (executable)
index 936f520..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: LGPL-2.1+
-
-set -eu
-[ -n "${DEBUG:-}" ] && set -x
-
-PASS=0
-
-cleanup() {
-    [ "$PASS" = "1" ] || (echo FAIL && exit 1)
-}
-
-trap cleanup EXIT HUP INT TERM
-
-LXCFSDIR=${LXCFSDIR:-/var/lib/lxcfs}
-
-if ! mountpoint -q ${LXCFSDIR}; then
-    echo "lxcfs isn't mounted on ${LXCFSDIR}"
-    exit 1
-fi
-
-if [ "{{ HAVE_FUSE_RETURNS_DT_TYPE }}" != "1" ]; then
-    echo "FUSE3 version doesn't support what's needed for sysfs cpu"
-    PASS=1
-    exit 0
-fi
-
-echo "==> Setting up memory/cpuset cgroup in lxcfs_test_proc"
-[ ! -d /sys/devices/system/cpu ] && exit 0
-mount -o bind "${LXCFSDIR}/sys/devices/system/cpu" "/sys/devices/system/cpu"
-num_cpus="$(getconf _NPROCESSORS_CONF)"
-umount -l "/sys/devices/system/cpu"
-
-echo "Detected ${num_cpus} cpus"
-[ "${num_cpus}" != "0" ]
-
-PASS=1