Skip to content

Commit

Permalink
Merge branch 'master' into jn/56628
Browse files Browse the repository at this point in the history
  • Loading branch information
inkydragon authored Jan 18, 2025
2 parents d2b19a9 + 1740287 commit 39ad8cb
Show file tree
Hide file tree
Showing 21 changed files with 255 additions and 28 deletions.
2 changes: 1 addition & 1 deletion Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1402,7 +1402,7 @@ CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.34|GLIBCXX_3\.5\.|GLIBCXX_4\.
# Note: we explicitly _do not_ define `CSL` here, since it requires some more
# advanced techniques to decide whether it should be installed from a BB source
# or not. See `deps/csl.mk` for more detail.
BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP OPENSSL LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP LLD LIBTRACYCLIENT BOLT
BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP OPENSSL LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP LLD LIBTRACYCLIENT BOLT MMTK_JULIA
define SET_BB_DEFAULT
# First, check to see if BB is disabled on a global setting
ifeq ($$(USE_BINARYBUILDER),0)
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,10 @@ endif
endif
endif

ifneq (${MMTK_PLAN},None)
JL_PRIVATE_LIBS-0 += libmmtk_julia
endif

# Note that we disable MSYS2's path munging here, as otherwise
# it replaces our `:`-separated list as a `;`-separated one.
define stringreplace
Expand Down
90 changes: 87 additions & 3 deletions base/condition.jl
Original file line number Diff line number Diff line change
Expand Up @@ -125,20 +125,104 @@ proceeding.
"""
function wait end

# wait with timeout
#
# The behavior of wait changes if a timeout is specified. There are
# three concurrent entities that can interact:
# 1. Task W: the task that calls wait w/timeout.
# 2. Task T: the task created to handle a timeout.
# 3. Task N: the task that notifies the Condition being waited on.
#
# Typical flow:
# - W enters the Condition's wait queue.
# - W creates T and stops running (calls wait()).
# - T, when scheduled, waits on a Timer.
# - Two common outcomes:
# - N notifies the Condition.
# - W starts running, closes the Timer, sets waiter_left and returns
# the notify'ed value.
# - The closed Timer throws an EOFError to T which simply ends.
# - The Timer expires.
# - T starts running and locks the Condition.
# - T confirms that waiter_left is unset and that W is still in the
# Condition's wait queue; it then removes W from the wait queue,
# sets dosched to true and unlocks the Condition.
# - If dosched is true, T schedules W with the special :timed_out
# value.
# - T ends.
# - W runs and returns :timed_out.
#
# Some possible interleavings:
# - N notifies the Condition but the Timer expires and T starts running
# before W:
# - W closing the expired Timer is benign.
# - T will find that W is no longer in the Condition's wait queue
# (which is protected by a lock) and will not schedule W.
# - N notifies the Condition; W runs and calls wait on the Condition
# again before the Timer expires:
# - W sets waiter_left before leaving. When T runs, it will find that
# waiter_left is set and will not schedule W.
#
# The lock on the Condition's wait queue and waiter_left together
# ensure proper synchronization and behavior of the tasks involved.

"""
wait(c::GenericCondition; first::Bool=false)
wait(c::GenericCondition; first::Bool=false, timeout::Real=0.0)
Wait for [`notify`](@ref) on `c` and return the `val` parameter passed to `notify`.
If the keyword `first` is set to `true`, the waiter will be put _first_
in line to wake up on `notify`. Otherwise, `wait` has first-in-first-out (FIFO) behavior.
If `timeout` is specified, cancel the `wait` when it expires and return
`:timed_out`. The minimum value for `timeout` is 0.001 seconds, i.e. 1
millisecond.
"""
function wait(c::GenericCondition; first::Bool=false)
function wait(c::GenericCondition; first::Bool=false, timeout::Real=0.0)
timeout == 0.0 || timeout 1e-3 || throw(ArgumentError("timeout must be ≥ 1 millisecond"))

ct = current_task()
_wait2(c, ct, first)
token = unlockall(c.lock)

timer::Union{Timer, Nothing} = nothing
waiter_left::Union{Threads.Atomic{Bool}, Nothing} = nothing
if timeout > 0.0
timer = Timer(timeout)
waiter_left = Threads.Atomic{Bool}(false)
# start a task to wait on the timer
t = Task() do
try
wait(timer)
catch e
# if the timer was closed, the waiting task has been scheduled; do nothing
e isa EOFError && return
end
dosched = false
lock(c.lock)
# Confirm that the waiting task is still in the wait queue and remove it. If
# the task is not in the wait queue, it must have been notified already so we
# don't do anything here.
if !waiter_left[] && ct.queue == c.waitq
dosched = true
Base.list_deletefirst!(c.waitq, ct)
end
unlock(c.lock)
# send the waiting task a timeout
dosched && schedule(ct, :timed_out)
end
t.sticky = false
Threads._spawn_set_thrpool(t, :interactive)
schedule(t)
end

try
return wait()
res = wait()
if timer !== nothing
close(timer)
waiter_left[] = true
end
return res
catch
q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
rethrow()
Expand Down
14 changes: 13 additions & 1 deletion base/logging/logging.jl
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ function logmsg_code(_module, file, line, level, message, exs...)
end
line = $(log_data._line)
local msg, kwargs
$(logrecord) && invokelatest($handle_message,
$(logrecord) && $handle_message_nothrow(
logger, level, msg, _module, group, id, file, line;
kwargs...)
end
Expand All @@ -420,6 +420,18 @@ function logmsg_code(_module, file, line, level, message, exs...)
end
end

@noinline function handle_message_nothrow(logger, level, msg, _module, group, id, file, line; kwargs...)
@nospecialize
try
@invokelatest handle_message(
logger, level, msg, _module, group, id, file, line;
kwargs...)

catch err
@invokelatest logging_error(logger, level, _module, group, id, file, line, err, true)
end
end

function process_logmsg_exs(_orig_module, _file, _line, level, message, exs...)
@nospecialize
local _group, _id
Expand Down
1 change: 1 addition & 0 deletions base/options.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ struct JLOptions
trace_compile_timing::Int8
trim::Int8
task_metrics::Int8
timeout_for_safepoint_straggler_s::Int16
end

# This runs early in the sysimage != is not defined yet
Expand Down
6 changes: 6 additions & 0 deletions deps/checksums/mmtk_julia
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
mmtk_julia-b69acf5af7a7dd97c1cc6fd99f7c2d51b477f214.tar.gz/md5/1911cf084d26c48e2ed58af3d268b4b6
mmtk_julia-b69acf5af7a7dd97c1cc6fd99f7c2d51b477f214.tar.gz/sha512/75beab54398989c46b62e714b242cf6705d88d220f40c21e494e0f29161437f5fbe9ba05b543d2353a1ad76f4239ac4025b476be0be864649f310f14935289fe
mmtk_julia-f07d66aafc86af84ea988b35335acc9bbc770fa1.tar.gz/md5/38afb5db6d8c55413a4ec96aefa2ebb4
mmtk_julia-f07d66aafc86af84ea988b35335acc9bbc770fa1.tar.gz/sha512/78525582a46a6baf8d33df7b622e55cf244439afcd7192ba55489c1bc18393d1237d2903d517c610484bf9e2a7338ad31435a9cbf70889d6bcf87c40cec829e5
mmtk_julia.v0.30.3+1.x86_64-linux-gnu.tar.gz/md5/631b204574da7062802dac501a4b711f
mmtk_julia.v0.30.3+1.x86_64-linux-gnu.tar.gz/sha512/daaed59d08fc49621479ed638dea0aac0cba123986e486571447e8e21e9a098776ce2e87fbd92ddea276782fc44621f23d40fa213296b28e1d4480553c7de4f7

This file was deleted.

This file was deleted.

8 changes: 8 additions & 0 deletions deps/mmtk_julia.mk
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
MMTK_MOVING := 0
MMTK_VARS := MMTK_PLAN=$(MMTK_PLAN) MMTK_MOVING=$(MMTK_MOVING)

ifneq ($(USE_BINARYBUILDER_MMTK_JULIA),1)
$(eval $(call git-external,mmtk_julia,MMTK_JULIA,,,$(BUILDDIR)))
get-mmtk_julia: $(MMTK_JULIA_SRC_FILE)

Expand Down Expand Up @@ -70,3 +71,10 @@ $(build_prefix)/manifest/mmtk_julia: $(BUILDROOT)/usr/lib/libmmtk_julia.so
@echo $(UNINSTALL_mmtk_julia) > $@

endif # MMTK_JULIA_DIR

else
# We are building using the BinaryBuilder version of the binding

$(eval $(call bb-install,mmtk_julia,MMTK_JULIA,false))

endif # USE_BINARYBUILDER_MMTK_JULIA
6 changes: 4 additions & 2 deletions deps/mmtk_julia.version
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
MMTK_JULIA_BRANCH = master
MMTK_JULIA_SHA1 = b69acf5af7a7dd97c1cc6fd99f7c2d51b477f214
MMTK_JULIA_SHA1 = f07d66aafc86af84ea988b35335acc9bbc770fa1
MMTK_JULIA_GIT_URL := https://github.com/mmtk/mmtk-julia.git
MMTK_JULIA_TAR_URL = https://github.com/mmtk/mmtk-julia/archive/refs/tags/v0.30.2.tar.gz
MMTK_JULIA_TAR_URL = https://github.com/mmtk/mmtk-julia/archive/refs/tags/v0.30.3.tar.gz
MMTK_JULIA_JLL_VER := 0.30.3+1
MMTK_JULIA_JLL_NAME := mmtk_julia
3 changes: 3 additions & 0 deletions src/gc-mmtk.c
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,9 @@ JL_DLLEXPORT void jl_gc_scan_vm_specific_roots(RootsWorkClosure* closure)
add_node_to_tpinned_roots_buffer(closure, &tpinned_buf, &tpinned_len, jl_global_roots_list);
add_node_to_tpinned_roots_buffer(closure, &tpinned_buf, &tpinned_len, jl_global_roots_keyset);

// FIXME: transivitely pinning for now, should be removed after we add moving Immix
add_node_to_tpinned_roots_buffer(closure, &tpinned_buf, &tpinned_len, precompile_field_replace);

// Push the result of the work.
(closure->report_nodes_func)(buf.ptr, len, buf.cap, closure->data, false);
(closure->report_tpinned_nodes_func)(tpinned_buf.ptr, tpinned_len, tpinned_buf.cap, closure->data, false);
Expand Down
12 changes: 12 additions & 0 deletions src/jloptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ JL_DLLEXPORT void jl_init_options(void)
0, // trace_compile_timing
JL_TRIM_NO, // trim
0, // task_metrics
-1, // timeout_for_safepoint_straggler_s
};
jl_options_initialized = 1;
}
Expand Down Expand Up @@ -311,6 +312,8 @@ static const char opts_hidden[] =
" --output-asm <name> Generate an assembly file (.s)\n"
" --output-incremental={yes|no*} Generate an incremental output file (rather than\n"
" complete)\n"
" --timeout-for-safepoint-straggler <seconds> If this value is set, then we will dump the backtrace for a thread\n"
" that fails to reach a safepoint within the specified time\n"
" --trace-compile={stderr|name} Print precompile statements for methods compiled\n"
" during execution or save to stderr or a path. Methods that\n"
" were recompiled are printed in yellow or with a trailing\n"
Expand Down Expand Up @@ -346,6 +349,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
opt_warn_scope,
opt_inline,
opt_polly,
opt_timeout_for_safepoint_straggler,
opt_trace_compile,
opt_trace_compile_timing,
opt_trace_dispatch,
Expand Down Expand Up @@ -427,6 +431,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
{ "warn-scope", required_argument, 0, opt_warn_scope },
{ "inline", required_argument, 0, opt_inline },
{ "polly", required_argument, 0, opt_polly },
{ "timeout-for-safepoint-straggler", required_argument, 0, opt_timeout_for_safepoint_straggler },
{ "trace-compile", required_argument, 0, opt_trace_compile },
{ "trace-compile-timing", no_argument, 0, opt_trace_compile_timing },
{ "trace-dispatch", required_argument, 0, opt_trace_dispatch },
Expand Down Expand Up @@ -970,6 +975,13 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
else
jl_errorf("julia: invalid argument to --permalloc-pkgimg={yes|no} (%s)", optarg);
break;
case opt_timeout_for_safepoint_straggler:
errno = 0;
long timeout = strtol(optarg, &endptr, 10);
if (errno != 0 || optarg == endptr || timeout < 1 || timeout > INT16_MAX)
jl_errorf("julia: --timeout-for-safepoint-straggler=<seconds>; seconds must be an integer between 1 and %d", INT16_MAX);
jl_options.timeout_for_safepoint_straggler_s = (int16_t)timeout;
break;
case opt_trim:
if (optarg == NULL || !strcmp(optarg,"safe"))
jl_options.trim = JL_TRIM_SAFE;
Expand Down
1 change: 1 addition & 0 deletions src/jloptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ typedef struct {
int8_t trace_compile_timing;
int8_t trim;
int8_t task_metrics;
int16_t timeout_for_safepoint_straggler_s;
} jl_options_t;

#endif
2 changes: 2 additions & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,8 @@ typedef struct {
size_t bt_size;
int tid;
} jl_record_backtrace_result_t;
JL_DLLEXPORT JL_DLLEXPORT size_t jl_try_record_thread_backtrace(jl_ptls_t ptls2, struct _jl_bt_element_t *bt_data,
size_t max_bt_size) JL_NOTSAFEPOINT;
JL_DLLEXPORT jl_record_backtrace_result_t jl_record_backtrace(jl_task_t *t, struct _jl_bt_element_t *bt_data,
size_t max_bt_size, int all_tasks_profiler) JL_NOTSAFEPOINT;
extern volatile struct _jl_bt_element_t *profile_bt_data_prof;
Expand Down
31 changes: 27 additions & 4 deletions src/safepoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,33 @@ void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
// Use system mutexes rather than spin locking to minimize wasted CPU time
// while we wait for other threads reach a safepoint.
// This is particularly important when run under rr.
uv_mutex_lock(&safepoint_lock);
if (!jl_atomic_load_relaxed(&ptls2->gc_state))
uv_cond_wait(&safepoint_cond_begin, &safepoint_lock);
uv_mutex_unlock(&safepoint_lock);
if (jl_options.timeout_for_safepoint_straggler_s == -1) { // timeout was not specified: no need to dump the backtrace
uv_mutex_lock(&safepoint_lock);
if (!jl_atomic_load_relaxed(&ptls2->gc_state)) {
uv_cond_wait(&safepoint_cond_begin, &safepoint_lock);
}
uv_mutex_unlock(&safepoint_lock);
}
else {
const int64_t timeout = jl_options.timeout_for_safepoint_straggler_s * 1000000000; // convert to nanoseconds
int ret = 0;
uv_mutex_lock(&safepoint_lock);
if (!jl_atomic_load_relaxed(&ptls2->gc_state)) {
ret = uv_cond_timedwait(&safepoint_cond_begin, &safepoint_lock, timeout);
}
uv_mutex_unlock(&safepoint_lock);
// If we woke up because of a timeout, print the backtrace of the straggler
if (ret == UV_ETIMEDOUT) {
jl_safe_printf("===== Thread %d failed to reach safepoint after %d seconds, printing backtrace below =====\n", ptls2->tid + 1, jl_options.timeout_for_safepoint_straggler_s);
// Try to record the backtrace of the straggler using `jl_try_record_thread_backtrace`
jl_ptls_t ptls = jl_current_task->ptls;
size_t bt_size = jl_try_record_thread_backtrace(ptls2, ptls->bt_data, JL_MAX_BT_SIZE);
// Print the backtrace of the straggler
for (size_t i = 0; i < bt_size; i += jl_bt_entry_size(ptls->bt_data + i)) {
jl_print_bt_entry_codeloc(ptls->bt_data + i);
}
}
}
}
}
}
Expand Down
21 changes: 6 additions & 15 deletions src/signals-unix.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,23 +310,14 @@ int exc_reg_is_write_fault(uintptr_t esr) {
#include <sys/eventfd.h>
#include <link.h>

typedef struct {
void (*f)(void*) JL_NOTSAFEPOINT;
void *ctx;
} callback_t;
static int with_dl_iterate_phdr_lock(struct dl_phdr_info *info, size_t size, void *data)
{
jl_lock_profile();
callback_t *callback = (callback_t*)data;
callback->f(callback->ctx);
jl_unlock_profile();
return 1; // only call this once
}

void jl_with_stackwalk_lock(void (*f)(void*), void *ctx)
{
callback_t callback = {f, ctx};
dl_iterate_phdr(with_dl_iterate_phdr_lock, &callback);
sigset_t sset, oset;
sigemptyset(&sset);
sigaddset(&sset, SIGUSR2);
pthread_sigmask(SIG_BLOCK, &sset, &oset);
f(ctx);
pthread_sigmask(SIG_SETMASK, &oset, NULL);
}

#if defined(_OS_LINUX_) && (defined(_CPU_X86_64_) || defined(_CPU_X86_))
Expand Down
19 changes: 19 additions & 0 deletions src/stackwalk.c
Original file line number Diff line number Diff line change
Expand Up @@ -1260,6 +1260,25 @@ static void suspend(void *ctx)
suspenddata->success = jl_thread_suspend_and_get_state(suspenddata->old, 1, suspenddata->c);
}

JL_DLLEXPORT size_t jl_try_record_thread_backtrace(jl_ptls_t ptls2, jl_bt_element_t *bt_data, size_t max_bt_size) JL_NOTSAFEPOINT
{
int16_t tid = ptls2->tid;
jl_task_t *t = NULL;
bt_context_t *context = NULL;
bt_context_t c;
suspend_t suspenddata = {tid, &c};
jl_with_stackwalk_lock(suspend, &suspenddata);
if (!suspenddata.success) {
return 0;
}
// thread is stopped, safe to read the task it was running before we stopped it
t = jl_atomic_load_relaxed(&ptls2->current_task);
context = &c;
size_t bt_size = rec_backtrace_ctx(bt_data, max_bt_size, context, ptls2->previous_task ? NULL : t->gcstack);
jl_thread_resume(tid);
return bt_size;
}

JL_DLLEXPORT jl_record_backtrace_result_t jl_record_backtrace(jl_task_t *t, jl_bt_element_t *bt_data, size_t max_bt_size, int all_tasks_profiler) JL_NOTSAFEPOINT
{
int16_t tid = INT16_MAX;
Expand Down
Loading

0 comments on commit 39ad8cb

Please sign in to comment.