From 96e84995750b460bcdf40feca07fe96d97a5f484 Mon Sep 17 00:00:00 2001 From: Thomas Walter <31201229+waltoss@users.noreply.github.com> Date: Wed, 28 Feb 2024 12:23:23 +0100 Subject: [PATCH] linux: fix uv_available_parallelism using cgroup (#4278) uv_available_parallelism does not handle container cpu limit set by systems like Docker or Kubernetes. This patch fixes this limitation by comparing the amount of available cpus returned by syscall with the quota of cpus available defined in the cgroup. Fixes: https://github.com/libuv/libuv/issues/4146 (cherry picked from commit 6b56200cc8e0d60d7662f0cb49eccbd23530a3e1) --- src/unix/core.c | 11 ++- src/unix/internal.h | 10 +++ src/unix/linux.c | 130 ++++++++++++++++++++++++++++++++++++ test/test-platform-output.c | 39 +++++++++++ 4 files changed, 188 insertions(+), 2 deletions(-) diff --git a/src/unix/core.c b/src/unix/core.c index 352d50ebb9d..cfd4c7a2076 100644 --- a/src/unix/core.c +++ b/src/unix/core.c @@ -1869,6 +1869,8 @@ unsigned int uv_available_parallelism(void) { #ifdef __linux__ cpu_set_t set; long rc; + double rc_with_cgroup; + uv__cpu_constraint c = {0, 0, 0.0}; memset(&set, 0, sizeof(set)); @@ -1880,8 +1882,13 @@ unsigned int uv_available_parallelism(void) { rc = CPU_COUNT(&set); else rc = sysconf(_SC_NPROCESSORS_ONLN); - - if (rc < 1) + + if (uv__get_constrained_cpu(&c) == 0 && c.period_length > 0) { + rc_with_cgroup = (double)c.quota_per_period / c.period_length * c.proportions; + if (rc_with_cgroup < rc) + rc = (long)rc_with_cgroup; /* Casting is safe since rc_with_cgroup < rc < LONG_MAX */ + } + if (rc < 1) rc = 1; return (unsigned) rc; diff --git a/src/unix/internal.h b/src/unix/internal.h index 1a3570d293b..7768c2d0fc3 100644 --- a/src/unix/internal.h +++ b/src/unix/internal.h @@ -461,4 +461,14 @@ uv__fs_copy_file_range(int fd_in, #define UV__CPU_AFFINITY_SUPPORTED 0 #endif +#ifdef __linux__ +typedef struct { + long long quota_per_period; + long long period_length; + double proportions; +} uv__cpu_constraint; + +int uv__get_constrained_cpu(uv__cpu_constraint* constraint); +#endif + #endif /* UV_UNIX_INTERNAL_H_ */ diff --git a/src/unix/linux.c b/src/unix/linux.c index 5690eb0b2f5..79fd84cf78d 100644 --- a/src/unix/linux.c +++ b/src/unix/linux.c @@ -2251,6 +2251,136 @@ uint64_t uv_get_available_memory(void) { } +static int uv__get_cgroupv2_constrained_cpu(const char* cgroup, + uv__cpu_constraint* constraint) { + char path[256]; + char buf[1024]; + unsigned int weight; + int cgroup_size; + const char* cgroup_trimmed; + char quota_buf[16]; + + if (strncmp(cgroup, "0::/", 4) != 0) + return UV_EINVAL; + + /* Trim ending \n by replacing it with a 0 */ + cgroup_trimmed = cgroup + sizeof("0::/") - 1; /* Skip the prefix "0::/" */ + cgroup_size = (int)strcspn(cgroup_trimmed, "\n"); /* Find the first slash */ + + /* Construct the path to the cpu.max file */ + snprintf(path, sizeof(path), "/sys/fs/cgroup/%.*s/cpu.max", cgroup_size, + cgroup_trimmed); + + /* Read cpu.max */ + if (uv__slurp(path, buf, sizeof(buf)) < 0) + return UV_EIO; + + if (sscanf(buf, "%15s %llu", quota_buf, &constraint->period_length) != 2) + return UV_EINVAL; + + if (strncmp(quota_buf, "max", 3) == 0) + constraint->quota_per_period = LLONG_MAX; + else if (sscanf(quota_buf, "%lld", &constraint->quota_per_period) != 1) + return UV_EINVAL; // conversion failed + + /* Construct the path to the cpu.weight file */ + snprintf(path, sizeof(path), "/sys/fs/cgroup/%.*s/cpu.weight", cgroup_size, + cgroup_trimmed); + + /* Read cpu.weight */ + if (uv__slurp(path, buf, sizeof(buf)) < 0) + return UV_EIO; + + if (sscanf(buf, "%u", &weight) != 1) + return UV_EINVAL; + + constraint->proportions = (double)weight / 100.0; + + return 0; +} + +static char* uv__cgroup1_find_cpu_controller(const char* cgroup, + int* cgroup_size) { + /* Seek to the cpu controller line. */ + char* cgroup_cpu = strstr(cgroup, ":cpu,"); + + if (cgroup_cpu != NULL) { + /* Skip the controller prefix to the start of the cgroup path. */ + cgroup_cpu += sizeof(":cpu,") - 1; + /* Determine the length of the cgroup path, excluding the newline. */ + *cgroup_size = (int)strcspn(cgroup_cpu, "\n"); + } + + return cgroup_cpu; +} + +static int uv__get_cgroupv1_constrained_cpu(const char* cgroup, + uv__cpu_constraint* constraint) { + char path[256]; + char buf[1024]; + unsigned int shares; + int cgroup_size; + char* cgroup_cpu; + + cgroup_cpu = uv__cgroup1_find_cpu_controller(cgroup, &cgroup_size); + + if (cgroup_cpu == NULL) + return UV_EIO; + + /* Construct the path to the cpu.cfs_quota_us file */ + snprintf(path, sizeof(path), "/sys/fs/cgroup/%.*s/cpu.cfs_quota_us", + cgroup_size, cgroup_cpu); + + if (uv__slurp(path, buf, sizeof(buf)) < 0) + return UV_EIO; + + if (sscanf(buf, "%lld", &constraint->quota_per_period) != 1) + return UV_EINVAL; + + /* Construct the path to the cpu.cfs_period_us file */ + snprintf(path, sizeof(path), "/sys/fs/cgroup/%.*s/cpu.cfs_period_us", + cgroup_size, cgroup_cpu); + + /* Read cpu.cfs_period_us */ + if (uv__slurp(path, buf, sizeof(buf)) < 0) + return UV_EIO; + + if (sscanf(buf, "%lld", &constraint->period_length) != 1) + return UV_EINVAL; + + /* Construct the path to the cpu.shares file */ + snprintf(path, sizeof(path), "/sys/fs/cgroup/%.*s/cpu.shares", cgroup_size, + cgroup_cpu); + + /* Read cpu.shares */ + if (uv__slurp(path, buf, sizeof(buf)) < 0) + return UV_EIO; + + if (sscanf(buf, "%u", &shares) != 1) + return UV_EINVAL; + + constraint->proportions = (double)shares / 1024.0; + + return 0; +} + +int uv__get_constrained_cpu(uv__cpu_constraint* constraint) { + char cgroup[1024]; + + /* Read the cgroup from /proc/self/cgroup */ + if (uv__slurp("/proc/self/cgroup", cgroup, sizeof(cgroup)) < 0) + return UV_EIO; + + /* Check if the system is using cgroup v2 by examining /proc/self/cgroup + * The entry for cgroup v2 is always in the format "0::$PATH" + * see https://docs.kernel.org/admin-guide/cgroup-v2.html */ + if (strncmp(cgroup, "0::/", 4) == 0) + return uv__get_cgroupv2_constrained_cpu(cgroup, constraint); + else + return uv__get_cgroupv1_constrained_cpu(cgroup, constraint); +} + + void uv_loadavg(double avg[3]) { struct sysinfo info; char buf[128]; /* Large enough to hold all of /proc/loadavg. */ diff --git a/test/test-platform-output.c b/test/test-platform-output.c index 7fc5775e6f1..235dcb28d5e 100644 --- a/test/test-platform-output.c +++ b/test/test-platform-output.c @@ -90,6 +90,45 @@ TEST_IMPL(platform_output) { ASSERT_GE(par, 1); printf("uv_available_parallelism: %u\n", par); +#ifdef __linux__ + FILE* file; + int cgroup_version = 0; + unsigned int cgroup_par = 0; + uint64_t quota, period; + + // Attempt to parse cgroup v2 to deduce parallelism constraints + file = fopen("/sys/fs/cgroup/cpu.max", "r"); + if (file) { + if (fscanf(file, "%lu %lu", "a, &period) == 2 && quota > 0) { + cgroup_version = 2; + cgroup_par = (unsigned int)(quota / period); + } + fclose(file); + } + + // If cgroup v2 wasn't present, try parsing cgroup v1 + if (cgroup_version == 0) { + file = fopen("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us", "r"); + if (file) { + if (fscanf(file, "%lu", "a) == 1 && quota > 0) { + fclose(file); + file = fopen("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us", "r"); + if (file && fscanf(file, "%lu", &period) == 1) { + cgroup_version = 1; + cgroup_par = (unsigned int)(quota / period); + } + } + if (file) fclose(file); + } + } + + // If we found cgroup parallelism constraints, assert and print them + if (cgroup_par > 0) { + ASSERT_GE(par, cgroup_par); + printf("cgroup v%d available parallelism: %u\n", cgroup_version, cgroup_par); + } +#endif + err = uv_cpu_info(&cpus, &count); #if defined(__CYGWIN__) || defined(__MSYS__) ASSERT_EQ(err, UV_ENOSYS);