Skip to content

Commit

Permalink
Optimize allocation of x regs in scheduler threads
Browse files Browse the repository at this point in the history
Limit allocation to the actual number of registers required by loaded modules.
This helps keeping the allocation low unless code requiring many registers is
used.

Signed-off-by: Paul Guyot <pguyot@kallisys.net>
  • Loading branch information
pguyot committed Nov 19, 2023
1 parent 913d23d commit 1c99c77
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 15 deletions.
1 change: 1 addition & 0 deletions src/libAtomVM/module.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ struct Module
int end_instruction_ii;

unsigned int free_literals_data : 1;
uint16_t max_xregs;

#ifndef AVM_NO_SMP
Mutex *mutex;
Expand Down
102 changes: 87 additions & 15 deletions src/libAtomVM/opcodesswitch.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,16 @@
#define COMPACT_NBITS_VALUE 0x18

#ifdef IMPL_EXECUTE_LOOP
// Allocate initially this number of registers (suitable for exceptions)
#define DEFAULT_XREGS 3

#define ENSURE_XREGS(x_regs, x_regs_count, mod) \
if (mod->max_xregs > x_regs_count) { \
x_regs_count = mod->max_xregs; \
x_regs = realloc(x_regs, x_regs_count * sizeof(term)); \
}


#define SET_ERROR(error_type_atom) \
x_regs[0] = ERROR_ATOM; \
x_regs[1] = error_type_atom; \
Expand Down Expand Up @@ -164,8 +174,12 @@ typedef dreg_t dreg_gc_safe_t;
break; \
\
case COMPACT_ATOM: \
case COMPACT_XREG: \
case COMPACT_YREG: \
break; \
case COMPACT_XREG: { \
uint16_t reg_index = first_byte >> 4; \
mod->max_xregs = MAX(mod->max_xregs, reg_index + 1); \
} \
break; \
\
case COMPACT_EXTENDED: \
Expand Down Expand Up @@ -220,7 +234,12 @@ typedef dreg_t dreg_gc_safe_t;
} \
break; \
\
case COMPACT_LARGE_XREG: \
case COMPACT_LARGE_XREG: { \
uint16_t reg_index = (((first_byte & 0xE0) << 3) | *(decode_pc)++); \
mod->max_xregs = MAX(mod->max_xregs, reg_index + 1); \
} \
break; \
\
case COMPACT_LARGE_YREG: \
(decode_pc)++; \
break; \
Expand Down Expand Up @@ -256,11 +275,21 @@ typedef dreg_t dreg_gc_safe_t;
uint8_t reg_type = first_byte & 0xF; \
(dreg).reg_type = reg_type; \
switch (reg_type) { \
case COMPACT_XREG: \
case COMPACT_XREG: { \
uint16_t reg_index = first_byte >> 4; \
mod->max_xregs = MAX(mod->max_xregs, reg_index + 1); \
(dreg).index = reg_index; \
} \
break; \
case COMPACT_YREG: \
(dreg).index = first_byte >> 4; \
break; \
case COMPACT_LARGE_XREG: \
break; \
case COMPACT_LARGE_XREG: { \
uint16_t reg_index = (((first_byte & 0xE0) << 3) | *(decode_pc)++); \
mod->max_xregs = MAX(mod->max_xregs, reg_index + 1); \
(dreg).index = reg_index; \
} \
break; \
case COMPACT_LARGE_YREG: \
(dreg).index = (((first_byte & 0xE0) << 3) | *(decode_pc)++); \
break; \
Expand All @@ -280,10 +309,18 @@ typedef dreg_t dreg_gc_safe_t;
uint8_t first_byte = *(decode_pc)++; \
uint8_t reg_type = first_byte & 0xF; \
switch (reg_type) { \
case COMPACT_XREG: \
case COMPACT_XREG: { \
uint16_t reg_index = first_byte >> 4; \
mod->max_xregs = MAX(mod->max_xregs, reg_index + 1); \
} \
break; \
case COMPACT_YREG: \
break; \
case COMPACT_LARGE_XREG: \
case COMPACT_LARGE_XREG: { \
uint16_t reg_index = (((first_byte & 0xE0) << 3) | *(decode_pc)++); \
mod->max_xregs = MAX(mod->max_xregs, reg_index + 1); \
} \
break; \
case COMPACT_LARGE_YREG: \
(decode_pc)++; \
break; \
Expand Down Expand Up @@ -788,12 +825,13 @@ typedef struct
#define IS_EXTENDED_FP_REGISTER(decode_pc) \
(*decode_pc) == COMPACT_EXTENDED_FP_REGISTER

#define JUMP_TO_LABEL(module, label) \
if (module != mod) { \
prev_mod = mod; \
mod = module; \
code = mod->code->code; \
} \
#define JUMP_TO_LABEL(module, label) \
if (module != mod) { \
prev_mod = mod; \
mod = module; \
ENSURE_XREGS(x_regs, x_regs_count, mod); \
code = mod->code->code; \
} \
JUMP_TO_ADDRESS(mod->labels[label])

#ifndef TRACE_JUMP
Expand Down Expand Up @@ -919,11 +957,13 @@ typedef struct
if (module_index == prev_mod->module_index) { \
Module *t = mod; \
mod = prev_mod; \
ENSURE_XREGS(x_regs, x_regs_count, mod); \
prev_mod = t; \
code = mod->code->code; \
} else if (module_index != mod->module_index) { \
prev_mod = mod; \
mod = globalcontext_get_module_by_index(glb, module_index); \
ENSURE_XREGS(x_regs, x_regs_count, mod); \
code = mod->code->code; \
} \
pc = code + ((ctx->cp & 0xFFFFFF) >> 2); \
Expand Down Expand Up @@ -1580,7 +1620,8 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
const uint8_t *code;
Module *mod;
Module *prev_mod;
term *x_regs = malloc(MAX_REG * sizeof(term));
uint16_t x_regs_count = DEFAULT_XREGS;
term *x_regs = malloc(x_regs_count * sizeof(term));
const uint8_t *pc;
int remaining_reductions;
uint32_t live;
Expand All @@ -1595,6 +1636,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
return 0;
}
mod = ctx->saved_module;
ENSURE_XREGS(x_regs, x_regs_count, mod);
prev_mod = mod;
code = mod->code->code;
live = ctx->xregs_count;
Expand All @@ -1615,6 +1657,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
#ifdef IMPL_CODE_LOADER
TRACE("-- Loading code\n");
SMP_MODULE_LOCK(mod);
mod->max_xregs = 0;
const uint8_t *code = mod->code->code;
const uint8_t *pc = code;
uint32_t live;
Expand Down Expand Up @@ -4150,7 +4193,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)

dreg_t dreg;
DECODE_DEST_REGISTER(dreg, pc);

#ifdef IMPL_EXECUTE_LOOP
TRACE("bs_append/8, fail=%u size=%li unit=%u src=0x%lx dreg=%c%i\n", (unsigned) fail, size_val, (unsigned) unit, src, T_DEST_REG(dreg));
DECODE_COMPACT_TERM(src, src_pc)
Expand Down Expand Up @@ -4355,6 +4398,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
DECODE_LITERAL(live, pc);
term slots_term;
DECODE_COMPACT_TERM(slots_term, pc);
#ifdef IMPL_CODE_LOADER
// We use an additional x_regs to preserve src
mod->max_xregs = MAX(mod->max_xregs, live + 1);
#endif
#ifdef IMPL_EXECUTE_LOOP
int slots = term_to_int(slots_term);

Expand Down Expand Up @@ -4402,6 +4449,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
TRACE("bs_start_match3/4\n");
#endif

#ifdef IMPL_CODE_LOADER
// We use an additional x_regs to preserve src
mod->max_xregs = MAX(mod->max_xregs, live + 1);
#endif
#ifdef IMPL_EXECUTE_LOOP
// MEMORY_CAN_SHRINK because bs_start_match is classified as gc in beam_ssa_codegen.erl
#ifdef IMPL_EXECUTE_LOOP
Expand Down Expand Up @@ -4460,6 +4511,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
TRACE("bs_get_tail/3\n");
#endif

#ifdef IMPL_CODE_LOADER
// We use an additional x_regs to preserve src
mod->max_xregs = MAX(mod->max_xregs, live + 1);
#endif
#ifdef IMPL_EXECUTE_LOOP
VERIFY_IS_MATCH_STATE(src, "bs_get_tail");

Expand Down Expand Up @@ -5329,6 +5384,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
#endif
}

#ifdef IMPL_CODE_LOADER
// We use an additional x_regs to preserve src
mod->max_xregs = MAX(mod->max_xregs, live + 1);
#endif
#ifdef IMPL_EXECUTE_LOOP
//
// Maybe GC, and reset the src term in case it changed
Expand Down Expand Up @@ -5453,6 +5512,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
#endif
}

#ifdef IMPL_CODE_LOADER
// We use an additional x_regs to preserve src
mod->max_xregs = MAX(mod->max_xregs, live + 1);
#endif
#ifdef IMPL_EXECUTE_LOOP
//
// Maybe GC, and reset the src term in case it changed
Expand Down Expand Up @@ -6625,6 +6688,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
int unit;
DECODE_LITERAL(unit, pc);
j++;
#ifdef IMPL_CODE_LOADER
// We use an additional x_regs to preserve match_state
mod->max_xregs = MAX(mod->max_xregs, live + 1);
#endif
#ifdef IMPL_EXECUTE_LOOP
int matched_bits = size * unit;
if (bs_offset % 8 != 0 || matched_bits % 8 != 0) {
Expand Down Expand Up @@ -6660,6 +6727,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
int unit;
DECODE_LITERAL(unit, pc);
j++;
#ifdef IMPL_CODE_LOADER
// We use an additional x_regs to preserve match_state
mod->max_xregs = MAX(mod->max_xregs, live + 1);
#endif
#ifdef IMPL_EXECUTE_LOOP
size_t total_bits = term_binary_size(bs_bin) * 8;
size_t tail_bits = total_bits - bs_offset;
Expand Down Expand Up @@ -6765,6 +6836,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
{
int target_label = get_catch_label_and_change_module(ctx, &mod);
if (target_label) {
ENSURE_XREGS(x_regs, x_regs_count, mod);
code = mod->code->code;
JUMP_TO_ADDRESS(mod->labels[target_label]);
continue;
Expand Down

0 comments on commit 1c99c77

Please sign in to comment.