Skip to content

Commit

Permalink
linux: workaround to avoid deadlock inside dl_iterate_phdr in glibc
Browse files Browse the repository at this point in the history
Extend the fix for #43578 on Darwin to also cover the same bug in Glibc
(and just assume other libc have the same bug). We cannot use the same
atfork trick, since the atfork implementation of this in Glibc makes
this unsafe to use this after fork, just like Darwin (though for
different basic concurrency mistakes in each of their respective codes).

Fix #57017
  • Loading branch information
vtjnash committed Jan 13, 2025
1 parent 3b629f1 commit d36f24a
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 106 deletions.
3 changes: 1 addition & 2 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,7 @@ JL_DLLEXPORT void jl_lock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
JL_DLLEXPORT void jl_unlock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
JL_DLLEXPORT void jl_lock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
JL_DLLEXPORT void jl_unlock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
int jl_lock_stackwalk(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
void jl_unlock_stackwalk(int lockret) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
void jl_with_stackwalk_lock(void (*f)(void*) JL_NOTSAFEPOINT, void *ctx) JL_NOTSAFEPOINT;

arraylist_t *jl_get_all_tasks_arraylist(void) JL_NOTSAFEPOINT;
typedef struct {
Expand Down
9 changes: 3 additions & 6 deletions src/signals-mach.c
Original file line number Diff line number Diff line change
Expand Up @@ -714,13 +714,10 @@ static void jl_unlock_profile_mach(int dlsymlock, int keymgr_locked)
jl_unlock_profile();
}

int jl_lock_stackwalk(void)
{
return jl_lock_profile_mach(1);
}

void jl_unlock_stackwalk(int lockret)
void jl_with_stackwalk_lock(void (*)(void) f, void *ctx)
{
int retval = jl_lock_profile_mach(1);
f(ctx);
jl_unlock_profile_mach(1, lockret);
}

Expand Down
186 changes: 100 additions & 86 deletions src/signals-unix.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,20 +308,27 @@ int exc_reg_is_write_fault(uintptr_t esr) {
#else
#include <poll.h>
#include <sys/eventfd.h>
#include <link.h>

int jl_lock_stackwalk(void)
typedef struct {
void (*f)(void*) JL_NOTSAFEPOINT;
void *ctx;
} callback_t;
static int with_dl_iterate_phdr_lock(struct dl_phdr_info *info, size_t size, void *data)
{
jl_lock_profile();
return 0;
callback_t *callback = (callback_t*)data;
callback->f(callback->ctx);
jl_unlock_profile();
return 1; // only call this once
}

void jl_unlock_stackwalk(int lockret)
void jl_with_stackwalk_lock(void (*f)(void*), void *ctx)
{
(void)lockret;
jl_unlock_profile();
callback_t callback = {f, ctx};
dl_iterate_phdr(with_dl_iterate_phdr_lock, &callback);
}


#if defined(_OS_LINUX_) && (defined(_CPU_X86_64_) || defined(_CPU_X86_))
int is_write_fault(void *context) {
ucontext_t *ctx = (ucontext_t*)context;
Expand Down Expand Up @@ -435,7 +442,7 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
}

pthread_mutex_t in_signal_lock; // shared with jl_delete_thread
static bt_context_t *signal_context; // protected by in_signal_lock
static bt_context_t *usr2_signal_context; // protected by in_signal_lock
static int exit_signal_cond = -1;
static int signal_caught_cond = -1;
static int signals_inflight = 0;
Expand Down Expand Up @@ -507,7 +514,7 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
request = jl_atomic_load_acquire(&ptls2->signal_request);
assert(request == 0 || request == -1); (void) request;
jl_atomic_store_release(&ptls2->signal_request, 4); // prepare to resume normally, but later code may change this
*ctx = *signal_context;
*ctx = *usr2_signal_context;
return 1;
}

Expand Down Expand Up @@ -587,8 +594,8 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
if (!jl_atomic_cmpswap(&ptls->signal_request, &request, -1))
return;
if (request == 1) {
signal_context = jl_to_bt_context(ctx);
// acknowledge that we saw the signal_request and set signal_context
usr2_signal_context = jl_to_bt_context(ctx);
// acknowledge that we saw the signal_request and set usr2_signal_context
int err;
eventfd_t got = 1;
err = write(signal_caught_cond, &got, sizeof(eventfd_t));
Expand All @@ -602,7 +609,7 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
if (err != sizeof(eventfd_t)) abort();
assert(got == 1);
request = jl_atomic_exchange(&ptls->signal_request, -1);
signal_context = NULL;
usr2_signal_context = NULL;
assert(request == 2 || request == 3 || request == 4);
}
int err;
Expand Down Expand Up @@ -806,7 +813,7 @@ void trigger_profile_peek(void)
jl_safe_printf("\n======================================================================================\n");
jl_safe_printf("Information request received. A stacktrace will print followed by a %.1f second profile\n", profile_peek_duration);
jl_safe_printf("======================================================================================\n");
if (profile_bt_size_max == 0){
if (profile_bt_size_max == 0) {
// If the buffer hasn't been initialized, initialize with default size
// Keep these values synchronized with Profile.default_init()
if (jl_profile_init(10000000, 1000000) == -1) {
Expand All @@ -821,59 +828,93 @@ void trigger_profile_peek(void)
profile_autostop_time = jl_hrtime() + (profile_peek_duration * 1e9);
}

// assumes holding `jl_lock_stackwalk`
void jl_profile_thread_unix(int tid, bt_context_t *signal_context)
#if !defined(JL_DISABLE_LIBUNWIND)

static jl_bt_element_t signal_bt_data[JL_MAX_BT_SIZE + 1];
static size_t signal_bt_size = 0;
static void do_critical_profile(void *ctx)
{
if (jl_profile_is_buffer_full()) {
// Buffer full: Delete the timer
jl_profile_stop_timer();
return;
}
// notify thread to stop
if (!jl_thread_suspend_and_get_state(tid, 1, signal_context))
return;
// unwinding can fail, so keep track of the current state
// and restore from the SEGV handler if anything happens.
jl_jmp_buf *old_buf = jl_get_safe_restore();
jl_jmp_buf buf;

jl_set_safe_restore(&buf);
if (jl_setjmp(buf, 0)) {
jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
} else {
// Get backtrace data
profile_bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur,
profile_bt_size_max - profile_bt_size_cur - 1, signal_context, NULL);
bt_context_t signal_context;
// sample each thread, round-robin style in reverse order
// (so that thread zero gets notified last)
int nthreads = jl_atomic_load_acquire(&jl_n_threads);
for (int i = nthreads; i-- > 0; ) {
// notify thread to stop
if (!jl_thread_suspend_and_get_state(i, 1, &signal_context))
continue;

// do backtrace on thread contexts for critical signals
// this part must be signal-handler safe
signal_bt_size += rec_backtrace_ctx(signal_bt_data + signal_bt_size,
JL_MAX_BT_SIZE / nthreads - 1,
&signal_context, NULL);
signal_bt_data[signal_bt_size++].uintptr = 0;
jl_thread_resume(i);
}
jl_set_safe_restore(old_buf);
}

jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
static void do_profile(void *ctx)
{
bt_context_t signal_context;
int nthreads = jl_atomic_load_acquire(&jl_n_threads);
int *randperm = profile_get_randperm(nthreads);
for (int idx = nthreads; idx-- > 0; ) {
// Stop the threads in the random order.
int tid = randperm[idx];
// do backtrace for profiler
if (!profile_running)
return;
if (jl_profile_is_buffer_full()) {
// Buffer full: Delete the timer
jl_profile_stop_timer();
return;
}
// notify thread to stop
if (!jl_thread_suspend_and_get_state(tid, 1, &signal_context))
return;
// unwinding can fail, so keep track of the current state
// and restore from the SEGV handler if anything happens.
jl_jmp_buf *old_buf = jl_get_safe_restore();
jl_jmp_buf buf;

jl_set_safe_restore(&buf);
if (jl_setjmp(buf, 0)) {
jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
}
else {
// Get backtrace data
profile_bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur,
profile_bt_size_max - profile_bt_size_cur - 1, &signal_context, NULL);
}
jl_set_safe_restore(old_buf);

// store threadid but add 1 as 0 is preserved to indicate end of block
profile_bt_data_prof[profile_bt_size_cur++].uintptr = ptls2->tid + 1;
jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];

// store task id (never null)
profile_bt_data_prof[profile_bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task);
// store threadid but add 1 as 0 is preserved to indicate end of block
profile_bt_data_prof[profile_bt_size_cur++].uintptr = ptls2->tid + 1;

// store cpu cycle clock
profile_bt_data_prof[profile_bt_size_cur++].uintptr = cycleclock();
// store task id (never null)
profile_bt_data_prof[profile_bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task);

// store whether thread is sleeping (don't ever encode a state as `0` since is preserved to indicate end of block)
int state = jl_atomic_load_relaxed(&ptls2->sleep_check_state) == 0 ? PROFILE_STATE_THREAD_NOT_SLEEPING : PROFILE_STATE_THREAD_SLEEPING;
profile_bt_data_prof[profile_bt_size_cur++].uintptr = state;
// store cpu cycle clock
profile_bt_data_prof[profile_bt_size_cur++].uintptr = cycleclock();

// Mark the end of this block with two 0's
profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
// store whether thread is sleeping (don't ever encode a state as `0` since is preserved to indicate end of block)
int state = jl_atomic_load_relaxed(&ptls2->sleep_check_state) == 0 ? PROFILE_STATE_THREAD_NOT_SLEEPING : PROFILE_STATE_THREAD_SLEEPING;
profile_bt_data_prof[profile_bt_size_cur++].uintptr = state;

// notify thread to resume
jl_thread_resume(tid);
// Mark the end of this block with two 0's
profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;

// notify thread to resume
jl_thread_resume(tid);
}
}
#endif

static void *signal_listener(void *arg)
{
static jl_bt_element_t bt_data[JL_MAX_BT_SIZE + 1];
static size_t bt_size = 0;
sigset_t sset;
int sig, critical, profile;
jl_sigsetset(&sset);
Expand Down Expand Up @@ -1005,46 +1046,18 @@ static void *signal_listener(void *arg)
}
}

int nthreads = jl_atomic_load_acquire(&jl_n_threads);
bt_size = 0;
signal_bt_size = 0;
#if !defined(JL_DISABLE_LIBUNWIND)
bt_context_t signal_context;
if (critical) {
int lockret = jl_lock_stackwalk();
// sample each thread, round-robin style in reverse order
// (so that thread zero gets notified last)
for (int i = nthreads; i-- > 0; ) {
// notify thread to stop
if (!jl_thread_suspend_and_get_state(i, 1, &signal_context))
continue;

// do backtrace on thread contexts for critical signals
// this part must be signal-handler safe
bt_size += rec_backtrace_ctx(bt_data + bt_size,
JL_MAX_BT_SIZE / nthreads - 1,
&signal_context, NULL);
bt_data[bt_size++].uintptr = 0;
jl_thread_resume(i);
}
jl_unlock_stackwalk(lockret);
jl_with_stackwalk_lock(do_critical_profile, NULL);
}
else if (profile) {
if (profile_all_tasks) {
// Don't take the stackwalk lock here since it's already taken in `jl_rec_backtrace`
jl_profile_task();
}
else {
int lockret = jl_lock_stackwalk();
int *randperm = profile_get_randperm(nthreads);
for (int idx = nthreads; idx-- > 0; ) {
// Stop the threads in the random order.
int i = randperm[idx];
// do backtrace for profiler
if (profile_running) {
jl_profile_thread_unix(i, &signal_context);
}
}
jl_unlock_stackwalk(lockret);
jl_with_stackwalk_lock(do_profile, NULL);
}
}
#ifndef HAVE_MACH
Expand All @@ -1065,11 +1078,12 @@ static void *signal_listener(void *arg)
//#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309L && !HAVE_KEVENT
// si_code = info.si_code;
//#endif
jl_exit_thread0(sig, bt_data, bt_size);
jl_exit_thread0(sig, signal_bt_data, signal_bt_size);
}
else if (critical) {
// critical in this case actually means SIGINFO request
#ifndef SIGINFO // SIGINFO already prints something similar automatically
int nthreads = jl_atomic_load_acquire(&jl_n_threads);
int n_threads_running = 0;
for (int idx = nthreads; idx-- > 0; ) {
jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[idx];
Expand All @@ -1080,8 +1094,8 @@ static void *signal_listener(void *arg)

jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
size_t i;
for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
jl_print_bt_entry_codeloc(bt_data + i);
for (i = 0; i < signal_bt_size; i += jl_bt_entry_size(signal_bt_data + i)) {
jl_print_bt_entry_codeloc(signal_bt_data + i);
}
}
}
Expand Down
20 changes: 12 additions & 8 deletions src/signals-win.c
Original file line number Diff line number Diff line change
Expand Up @@ -383,20 +383,24 @@ void jl_thread_resume(int tid)
}
}

int jl_lock_stackwalk(void)
void jl_lock_stackwalk(void)
{
uv_mutex_lock(&jl_in_stackwalk);
jl_lock_profile();
return 0;
}

void jl_unlock_stackwalk(int lockret)
{
(void)lockret;
void jl_unlock_stackwalk(void)
jl_unlock_profile();
uv_mutex_unlock(&jl_in_stackwalk);
}

void jl_with_stackwalk_lock(void (*f)(void), void *ctx)
{
jl_lock_stackwalk();
f(ctx);
jl_unlock_stackwalk();
}


static DWORD WINAPI profile_bt( LPVOID lparam )
{
Expand All @@ -416,10 +420,10 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
}
else {
// TODO: bring this up to parity with other OS by adding loop over tid here
int lockret = jl_lock_stackwalk();
jl_lock_stackwalk();
CONTEXT ctxThread;
if (!jl_thread_suspend_and_get_state(0, 0, &ctxThread)) {
jl_unlock_stackwalk(lockret);
jl_unlock_stackwalk();
fputs("failed to suspend main thread. aborting profiling.", stderr);
jl_profile_stop_timer();
break;
Expand All @@ -446,7 +450,7 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
// Mark the end of this block with two 0's
profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
jl_unlock_stackwalk(lockret);
jl_unlock_stackwalk();
jl_thread_resume(0);
jl_check_profile_autostop();
}
Expand Down
Loading

0 comments on commit d36f24a

Please sign in to comment.