diff --git a/rts/motoko-rts/src/idl.rs b/rts/motoko-rts/src/idl.rs index b2936d7e470..18ddce2db40 100644 --- a/rts/motoko-rts/src/idl.rs +++ b/rts/motoko-rts/src/idl.rs @@ -4,7 +4,8 @@ use crate::buf::{read_byte, read_word, skip_leb128, Buf}; use crate::idl_trap_with; use crate::leb128::{leb128_decode, sleb128_decode}; use crate::memory::{alloc_blob, Memory}; -use crate::types::Words; +use crate::persistence::compatibility::TypeDescriptor; +use crate::types::{Value, Words}; use crate::utf8::utf8_validate; use core::cmp::min; @@ -1180,20 +1181,25 @@ unsafe extern "C" fn idl_sub_buf_init(rel_buf: *mut u32, typtbl_size1: u32, typt rel.init(); } -#[no_mangle] -unsafe extern "C" fn idl_sub( +#[ic_mem_fn] +unsafe fn idl_sub( + mem: &mut M, rel_buf: *mut u32, // a buffer with at least 2 * typtbl_size1 * typtbl_size2 bits typtbl1: *mut *mut u8, - typtbl2: *mut *mut u8, typtbl_end1: *mut u8, - typtbl_end2: *mut u8, typtbl_size1: u32, - typtbl_size2: u32, + candid_data2: Value, + type_offsets2: Value, t1: i32, t2: i32, ) -> bool { debug_assert!(rel_buf != (0 as *mut u32)); + let mut type_descriptor2 = TypeDescriptor::new(candid_data2, type_offsets2); + let typtbl2 = type_descriptor2.build_type_table(mem); + let typtbl_end2 = type_descriptor2.type_table_end(); + let typtbl_size2 = type_descriptor2.type_count() as u32; + let rel = BitRel { ptr: rel_buf, end: rel_buf.add(idl_sub_buf_words(typtbl_size1, typtbl_size2) as usize), diff --git a/rts/motoko-rts/src/persistence.rs b/rts/motoko-rts/src/persistence.rs index 9f740341e03..ed18a687c32 100644 --- a/rts/motoko-rts/src/persistence.rs +++ b/rts/motoko-rts/src/persistence.rs @@ -207,10 +207,9 @@ pub unsafe fn register_stable_type( mem: &mut M, new_candid_data: Value, new_type_offsets: Value, - new_actor_index: i32, ) { assert_eq!(new_candid_data.tag(), TAG_BLOB); - let mut new_type = TypeDescriptor::new(new_candid_data, new_type_offsets, new_actor_index); + let mut new_type = TypeDescriptor::new(new_candid_data, new_type_offsets); let metadata = PersistentMetadata::get(); let old_type = &mut (*metadata).stable_type; if !old_type.is_default() && !memory_compatible(mem, old_type, &mut new_type) { diff --git a/rts/motoko-rts/src/persistence/compatibility.rs b/rts/motoko-rts/src/persistence/compatibility.rs index 67ba50688df..028933c68ee 100644 --- a/rts/motoko-rts/src/persistence/compatibility.rs +++ b/rts/motoko-rts/src/persistence/compatibility.rs @@ -13,13 +13,21 @@ use crate::{ const DEFAULT_VALUE: Value = Value::from_scalar(0); +/// Relocatable static type descriptor used for Candid subtypes and program upgrade compatibility checks. +/// The descriptor consists of two blobs, one for the Candid type data and one denoting a vector of types. +/// The vector only stores relative offsets in the Candid data and does not hold any absolute addresses. +/// A type descriptor is used for two cases: +/// * To store the types of the previous program version in the persistent dynamic heap, with the GC +/// potentially moving the blobs. +/// * To load the types of the current program version from passive data segments, without that absolute +/// addresses are known at compile-time. +/// The static type table is only created when calling the Candid subtype or memory compatibility check. +/// As the static table contains absolute addresses, it can only temporarily used until the next GC increment. pub struct TypeDescriptor { // Blob with candid-encoded type definitions. candid_data: Value, // Blob with a list of `u32` offsets referring to the `candid_data`. type_offsets: Value, - // Type index of the main actor to the compared for memory compatibility. - main_actor_index: i32, } impl TypeDescriptor { @@ -27,22 +35,24 @@ impl TypeDescriptor { Self { candid_data: DEFAULT_VALUE, type_offsets: DEFAULT_VALUE, - main_actor_index: 0, } } - pub unsafe fn new( - candid_data: Value, - type_offsets: Value, - main_actor_index: i32, - ) -> TypeDescriptor { + pub unsafe fn new(candid_data: Value, type_offsets: Value) -> Self { Self { candid_data: candid_data.forward_if_possible(), type_offsets: type_offsets.forward_if_possible(), - main_actor_index, } } + pub fn is_default(&self) -> bool { + self.candid_data == DEFAULT_VALUE && self.type_offsets == DEFAULT_VALUE + } + + pub fn assert_initialized(&self) { + assert!(self.candid_data != DEFAULT_VALUE && self.type_offsets != DEFAULT_VALUE); + } + // GC root if part of the persistent stable type pub fn candid_data_location(&mut self) -> *mut Value { &mut self.candid_data as *mut Value @@ -53,22 +63,11 @@ impl TypeDescriptor { &mut self.type_offsets as *mut Value } - pub fn is_default(&self) -> bool { - self.candid_data == DEFAULT_VALUE - && self.type_offsets == DEFAULT_VALUE - && self.main_actor_index == 0 - } - - pub fn assert_initialized(&self) { - assert!(self.candid_data != DEFAULT_VALUE && self.type_offsets != DEFAULT_VALUE); - } - - pub unsafe fn assign(&mut self, mem: &mut M, other: &TypeDescriptor) { + pub unsafe fn assign(&mut self, mem: &mut M, other: &Self) { let candid_data_location = &mut self.candid_data as *mut Value; write_with_barrier(mem, candid_data_location, other.candid_data); let type_offsets_location = &mut self.type_offsets as *mut Value; write_with_barrier(mem, type_offsets_location, other.type_offsets); - self.main_actor_index = other.main_actor_index; } pub unsafe fn type_count(&self) -> usize { @@ -131,6 +130,9 @@ unsafe fn create_type_check_cache( cache } +// Fix main actor type index, see `compile.ml`. +const MAIN_ACTOR_TYPE_INDEX: i32 = 0; + /// Test whether the new stable type complies with the existing old stable type. /// This uses the existing IDL subtype test. pub unsafe fn memory_compatible( @@ -153,8 +155,8 @@ pub unsafe fn memory_compatible( new_type_table, old_table_end, new_table_end, - old_type.main_actor_index, - new_type.main_actor_index, + MAIN_ACTOR_TYPE_INDEX, + MAIN_ACTOR_TYPE_INDEX, true, ) } diff --git a/src/codegen/compile.ml b/src/codegen/compile.ml index 566370ffc5b..b80772bb216 100644 --- a/src/codegen/compile.ml +++ b/src/codegen/compile.ml @@ -257,6 +257,11 @@ module E = struct type local_names = (int32 * string) list (* For the debug section: Names of locals *) type func_with_names = func * local_names type lazy_function = (int32, func_with_names) Lib.AllocOnUse.t + type type_descriptor = { + candid_data_segment : int32; + type_offsets_segment : int32; + idl_types_segment : int32; + } type t = { (* Global fields *) (* Static *) @@ -281,10 +286,7 @@ module E = struct named_imports : int32 NameEnv.t ref; built_in_funcs : lazy_function NameEnv.t ref; static_strings : int32 StringEnv.t ref; - end_of_static_memory : int32 ref; (* End of statically allocated memory *) - static_memory : (int32 * string) list ref; (* Content of static memory *) - static_memory_frozen : bool ref; - (* Sanity check: Nothing should bump end_of_static_memory once it has been read *) + data_segments : string list ref; (* Passive data segments *) static_variables : int32 ref; (* Number of static variables (MutBox), accessed by index via the runtime system, and belonging to the GC root set. *) @@ -314,11 +316,14 @@ module E = struct (* requires stable memory (and emulation on wasm targets) *) requires_stable_memory : bool ref; + + (* Type descriptor of current program version, created on `conclude_module`. *) + global_type_descriptor : type_descriptor option ref; } (* The initial global environment *) - let mk_global mode rts trap_with dyn_mem : t = { + let mk_global mode rts trap_with : t = { mode; rts; trap_with; @@ -334,9 +339,7 @@ module E = struct named_imports = ref NameEnv.empty; built_in_funcs = ref NameEnv.empty; static_strings = ref StringEnv.empty; - end_of_static_memory = ref dyn_mem; - static_memory = ref []; - static_memory_frozen = ref false; + data_segments = ref []; static_variables = ref 0l; typtbl_typs = ref []; (* Metadata *) @@ -351,6 +354,7 @@ module E = struct local_names = ref []; features = ref FeatureSet.empty; requires_stable_memory = ref false; + global_type_descriptor = ref None; } (* This wraps Mo_types.Hash.hash to also record which labels we have seen, @@ -538,17 +542,10 @@ module E = struct let then_trap_with env msg = G.if0 (trap_with env msg) G.nop let else_trap_with env msg = G.if0 G.nop (trap_with env msg) - let reserve_static_memory (env : t) size : int32 = - if !(env.static_memory_frozen) then raise (Invalid_argument "Static memory frozen"); - let ptr = !(env.end_of_static_memory) in - let aligned = Int32.logand (Int32.add size 3l) (Int32.lognot 3l) in - env.end_of_static_memory := Int32.add ptr aligned; - ptr - - let add_mutable_static_bytes (env : t) data : int32 = - let ptr = reserve_static_memory env (Int32.of_int (String.length data)) in - env.static_memory := !(env.static_memory) @ [ (ptr, data) ]; - Int32.(add ptr ptr_skew) (* Return a skewed pointer *) + let add_data_segment (env : t) data : int32 = + let index = List.length !(env.data_segments) in + env.data_segments := !(env.data_segments) @ [ data ]; + Int32.of_int index let add_fun_ptr (env : t) fi : int32 = match FunEnv.find_opt fi !(env.func_ptrs) with @@ -568,33 +565,36 @@ module E = struct let add_static (env : t) (data : StaticBytes.t) : int32 = let b = StaticBytes.as_bytes data in match StringEnv.find_opt b !(env.static_strings) with - | Some ptr -> ptr + | Some segment_index -> segment_index | None -> - let ptr = add_mutable_static_bytes env b in - env.static_strings := StringEnv.add b ptr !(env.static_strings); - ptr - - let add_static_unskewed (env : t) (data : StaticBytes.t) : int32 = - Int32.add (add_static env data) ptr_unskew - - let get_end_of_static_memory env : int32 = - env.static_memory_frozen := true; - !(env.end_of_static_memory) + let segment_index = add_data_segment env b in + env.static_strings := StringEnv.add b segment_index !(env.static_strings); + segment_index + + let replace_data_segment (env : t) (segment_index : int32) (data : StaticBytes.t) : int32 = + let new_value = StaticBytes.as_bytes data in + let segment_index = Int32.to_int segment_index in + assert (segment_index < List.length !(env.data_segments)); + env.data_segments := List.mapi (fun index old_value -> + if index = segment_index then + (assert (old_value = ""); + new_value) + else + old_value + ) !(env.data_segments); + Int32.of_int (String.length new_value) + + let get_data_segments (env : t) = + !(env.data_segments) let add_static_variable (env : t) : int32 = - let index = !(env.static_variables) in - env.static_variables := Int32.add index 1l; - index + let variable_index = !(env.static_variables) in + env.static_variables := Int32.add variable_index 1l; + variable_index let count_static_variables (env : t) = !(env.static_variables) - let get_static_memory env = - !(env.static_memory) - - let mem_size env = - Int32.(add (div (get_end_of_static_memory env) page_size) 1l) - let collect_garbage env force = let name = "incremental_gc" in let gc_fn = if force || !Flags.force_gc then name else "schedule_" ^ name in @@ -628,8 +628,8 @@ module E = struct let requires_stable_memory (env : t) = !(env.requires_stable_memory) - let get_memories (env : t) = - nr {mtype = MemoryType {min = mem_size env; max = None}} + let get_memories (env : t) initial_memory_pages = + nr {mtype = MemoryType {min = initial_memory_pages; max = None}} :: match mode env with | Flags.WASIMode | Flags.WasmMode when !(env.requires_stable_memory) -> @@ -942,7 +942,7 @@ module RTS = struct E.add_func_import env "rts" "write_with_barrier" [I32Type; I32Type] []; E.add_func_import env "rts" "allocation_barrier" [I32Type] [I32Type]; E.add_func_import env "rts" "running_gc" [] [I32Type]; - E.add_func_import env "rts" "register_stable_type" [I32Type; I32Type; I32Type] []; + E.add_func_import env "rts" "register_stable_type" [I32Type; I32Type] []; E.add_func_import env "rts" "load_stable_actor" [] [I32Type]; E.add_func_import env "rts" "save_stable_actor" [I32Type] []; E.add_func_import env "rts" "free_stable_actor" [] []; @@ -959,7 +959,7 @@ module RTS = struct E.add_func_import env "rts" "idl_sub_buf_words" [I32Type; I32Type] [I32Type]; E.add_func_import env "rts" "idl_sub_buf_init" [I32Type; I32Type; I32Type] []; E.add_func_import env "rts" "idl_sub" - [I32Type; I32Type; I32Type; I32Type; I32Type; I32Type; I32Type; I32Type; I32Type] [I32Type]; + [I32Type; I32Type; I32Type; I32Type; I32Type; I32Type; I32Type; I32Type] [I32Type]; E.add_func_import env "rts" "leb128_decode" [I32Type] [I32Type]; E.add_func_import env "rts" "sleb128_decode" [I32Type] [I32Type]; E.add_func_import env "rts" "bigint_of_word32" [I32Type] [I32Type]; @@ -3338,12 +3338,10 @@ module Blob = struct BigNum.from_word32 env ) - let static_data env s = - compile_unboxed_const (Int32.add ptr_unskew (E.add_static env StaticBytes.[Bytes s])) - - let lit_ptr_len env s = - static_data env s ^^ - compile_unboxed_const (Int32.of_int (String.length s)) + (* unskewed target address, data offset, and length on stack *) + let load_static_data env s = + let segment_index = E.add_static env StaticBytes.[Bytes s] in + G.i (MemoryInit (nr segment_index)) let alloc env = E.call_import env "rts" "alloc_blob" ^^ @@ -3361,9 +3359,9 @@ module Blob = struct let (set_new_blob, get_new_blob) = new_local env "new_blob" in compile_unboxed_const blob_length ^^ alloc env ^^ set_new_blob ^^ get_new_blob ^^ payload_ptr_unskewed env ^^ (* target address *) - static_data env s ^^ (* source address *) - compile_unboxed_const blob_length ^^ (* copy length *) - Heap.memcpy env ^^ + compile_unboxed_const 0l ^^ (* data offset *) + compile_unboxed_const blob_length ^^ (* data length *) + load_static_data env s ^^ get_new_blob let as_ptr_len env = Func.share_code1 Func.Never env "as_ptr_size" ("x", I32Type) [I32Type; I32Type] ( @@ -3372,6 +3370,20 @@ module Blob = struct get_x ^^ len env ) + let lit_ptr_len env s = + lit env s ^^ + as_ptr_len env + + let load_data_segment env segment_index data_length = + let (set_blob, get_blob) = new_local env "data_segment_blob" in + data_length ^^ + alloc env ^^ set_blob ^^ + get_blob ^^ payload_ptr_unskewed env ^^ (* target address *) + compile_unboxed_const 0l ^^ (* data offset *) + data_length ^^ + G.i (MemoryInit (nr segment_index)) ^^ + get_blob + let of_ptr_size env = Func.share_code2 Func.Always env "blob_of_ptr_size" (("ptr", I32Type), ("size" , I32Type)) [I32Type] ( fun env get_ptr get_size -> let (set_x, get_x) = new_local env "x" in @@ -5715,26 +5727,44 @@ module MakeSerialization (Strm : Stream) = struct *) module Strm = Strm - - (* Globals recording known Candid types - See Note [Candid subtype checks] - *) - - let register_delayed_globals env = - (E.add_global32_delayed env "__typtbl" Immutable, - E.add_global32_delayed env "__typtbl_end" Immutable, - E.add_global32_delayed env "__typtbl_size" Immutable, - E.add_global32_delayed env "__typtbl_idltyps" Immutable) - - let get_typtbl env = - G.i (GlobalGet (nr (E.get_global env "__typtbl"))) - let get_typtbl_size env = - G.i (GlobalGet (nr (E.get_global env "__typtbl_size"))) - let get_typtbl_end env = - G.i (GlobalGet (nr (E.get_global env "__typtbl_end"))) - let get_typtbl_idltyps env = - G.i (GlobalGet (nr (E.get_global env "__typtbl_idltyps"))) - + (* Globals recording known Candid types + See Note [Candid subtype checks] + *) + let register_delayed_globals env = + (E.add_global32_delayed env "__candid_data_length" Immutable, + E.add_global32_delayed env "__type_offsets_length" Immutable, + E.add_global32_delayed env "__idl_types_length" Immutable) + + let get_candid_data_length env = + G.i (GlobalGet (nr (E.get_global env "__candid_data_length"))) + let get_type_offsets_length env = + G.i (GlobalGet (nr (E.get_global env "__type_offsets_length"))) + let get_idl_types_length env = + G.i (GlobalGet (nr (E.get_global env "__idl_types_length"))) + + let candid_type_offset_size = 4l + + let get_global_type_descriptor env = + match !(E.(env.global_type_descriptor)) with + | Some descriptor -> descriptor + | None -> assert false + + let load_candid_data env = + let descriptor = get_global_type_descriptor env in + Blob.load_data_segment env E.(descriptor.candid_data_segment) (get_candid_data_length env) + + let load_type_offsets env = + let descriptor = get_global_type_descriptor env in + Blob.load_data_segment env E.(descriptor.type_offsets_segment) (get_type_offsets_length env) + + let count_type_offsets env = + get_type_offsets_length env ^^ + compile_divU_const candid_type_offset_size + + let load_idl_types env = + let descriptor = get_global_type_descriptor env in + Blob.load_data_segment env E.(descriptor.idl_types_segment) (get_idl_types_length env) + module Registers = struct let register_globals env = E.add_global32 env "@@rel_buf_opt" Mutable 0l; @@ -5996,22 +6026,29 @@ module MakeSerialization (Strm : Stream) = struct List.map idx ts) (* See Note [Candid subtype checks] *) - let set_delayed_globals (env : E.t) (set_typtbl, set_typtbl_end, set_typtbl_size, set_typtbl_idltyps) = - let typdesc, offsets, idltyps = type_desc env Candid (E.get_typtbl_typs env) in - let static_typedesc = E.add_static_unskewed env [StaticBytes.Bytes typdesc] in - let static_typtbl = - let bytes = StaticBytes.i32s - (List.map (fun offset -> - Int32.(add static_typedesc (of_int(offset)))) - offsets) - in - E.add_static_unskewed env [bytes] - in - let static_idltyps = E.add_static_unskewed env [StaticBytes.i32s idltyps] in - set_typtbl static_typtbl; - set_typtbl_end Int32.(add static_typedesc (of_int (String.length typdesc))); - set_typtbl_size (Int32.of_int (List.length offsets)); - set_typtbl_idltyps static_idltyps + let reserve_global_type_descriptor (env : E.t) = + let candid_data_segment = E.add_data_segment env "" in + let type_offsets_segment = E.add_data_segment env "" in + let idl_types_segment = E.add_data_segment env "" in + E.(env.global_type_descriptor := Some { + candid_data_segment; + type_offsets_segment; + idl_types_segment; + }) + + let create_global_type_descriptor (env : E.t) (set_candid_data_length, set_type_offsets_length, set_idl_types_length) = + let descriptor = get_global_type_descriptor env in + let candid_data, type_offsets, idl_types = type_desc env Candid (E.get_typtbl_typs env) in + let candid_data_binary = [StaticBytes.Bytes candid_data] in + let candid_data_length = E.replace_data_segment env E.(descriptor.candid_data_segment) candid_data_binary in + set_candid_data_length candid_data_length; + let type_offsets_binary = [StaticBytes.i32s (List.map Int32.of_int type_offsets)] in + let type_offsets_length = E.replace_data_segment env E.(descriptor.type_offsets_segment) type_offsets_binary in + set_type_offsets_length type_offsets_length; + let idl_types_binary = [StaticBytes.i32s idl_types] in + let idl_types_length = E.replace_data_segment env E.(descriptor.idl_types_segment) idl_types_binary in + set_idl_types_length idl_types_length + (* Returns data (in bytes) and reference buffer size (in entries) needed *) let rec buffer_size env t = @@ -6360,18 +6397,19 @@ module MakeSerialization (Strm : Stream) = struct if extended then f (compile_unboxed_const 0l) else - get_typtbl_size1 ^^ get_typtbl_size env ^^ + get_typtbl_size1 ^^ count_type_offsets env ^^ E.call_import env "rts" "idl_sub_buf_words" ^^ Stack.dynamic_with_words env "rel_buf" (fun get_ptr -> - get_ptr ^^ get_typtbl_size1 ^^ get_typtbl_size env ^^ + get_ptr ^^ get_typtbl_size1 ^^ count_type_offsets env ^^ E.call_import env "rts" "idl_sub_buf_init" ^^ f get_ptr) (* See Note [Candid subtype checks] *) let idl_sub env t2 = let idx = E.add_typtbl_typ env t2 in - get_typtbl_idltyps env ^^ - G.i (Load {ty = I32Type; align = 0; offset = Int32.mul idx 4l (*!*); sz = None}) ^^ + load_idl_types env ^^ + Blob.payload_ptr_unskewed env ^^ + G.i (Load {ty = I32Type; align = 0; offset = Int32.mul idx candid_type_offset_size (*!*); sz = None}) ^^ Func.share_code6 Func.Always env ("idl_sub") (("rel_buf", I32Type), ("typtbl1", I32Type), @@ -6386,11 +6424,10 @@ module MakeSerialization (Strm : Stream) = struct E.else_trap_with env "null rel_buf" ^^ get_rel_buf ^^ get_typtbl1 ^^ - get_typtbl env ^^ get_typtbl_end1 ^^ - get_typtbl_end env ^^ get_typtbl_size1 ^^ - get_typtbl_size env ^^ + load_candid_data env ^^ + load_type_offsets env ^^ get_idltyp1 ^^ get_idltyp2 ^^ E.call_import env "rts" "idl_sub") @@ -7388,12 +7425,18 @@ encountered during code generation, the other is determined dynamically by, e.g. message payload. The latter will vary with each payload to decode. +The static type table and a type descriptor are stored in passive +data segments. Instead of absolute memory addresses, the static type +table in the data segment only contains relative offsets into type +descriptor. When loaded, these offsets are patched by static addresses +that point into the type descriptor. + The known Motoko types are accumulated in a global list as required and then, in a final compilation step, encoded to global type table -and sequence of type indices. The encoding is stored as static -data referenced by dedicated wasm globals so that we can generate -code that references the globals before their final definitions are -known. +and the type descriptor (sequence of type indices). The encoding is +stored in passive data segments referenced (by way of segment indices) +from dedicated wasm globals so that we can generate code that +references the globals before their final definitions are known. Deserializing a proper (not extended) Candid value stack allocates a mutable word buffer, of size determined by `idl_sub_buf_words`. @@ -7757,10 +7800,9 @@ module Persistence = struct let register_stable_type env actor_type = let (candid_type_desc, type_offsets, type_indices) = Serialization.(type_desc env Persistence [actor_type]) in let serialized_offsets = StaticBytes.(as_bytes [i32s (List.map Int32.of_int type_offsets)]) in - assert ((List.length type_indices) = 1); + assert (type_indices = [0l]); Blob.lit env candid_type_desc ^^ Blob.lit env serialized_offsets ^^ - compile_unboxed_const (List.nth type_indices 0) ^^ E.call_import env "rts" "register_stable_type" let create_actor env actor_type get_field_value = @@ -11705,7 +11747,7 @@ and conclude_module env set_serialization_globals start_fi_o = FuncDec.export_gc_trigger_method env; (* See Note [Candid subtype checks] *) - Serialization.set_delayed_globals env set_serialization_globals; + Serialization.create_global_type_descriptor env set_serialization_globals; (* declare before building GC *) @@ -11717,7 +11759,8 @@ and conclude_module env set_serialization_globals start_fi_o = Heap.register env; IC.register env; - set_heap_base (E.get_end_of_static_memory env); + let dynamic_heap_start = Lifecycle.end_ () in + set_heap_base dynamic_heap_start; (* Wrap the start function with the RTS initialization *) let rts_start_fi = E.add_fun env "rts_start" (Func.of_body env [] [] (fun env1 -> @@ -11738,15 +11781,15 @@ and conclude_module env set_serialization_globals start_fi_o = let other_imports = E.get_other_imports env in - let memories = E.get_memories env in + let initial_memory_pages = Int32.(add (div dynamic_heap_start page_size) 1l) in + let memories = E.get_memories env initial_memory_pages in let funcs = E.get_funcs env in - let data = List.map (fun (offset, init) -> nr { - index = nr 0l; - offset = nr (G.to_instr_list (compile_unboxed_const offset)); - init; - }) (E.get_static_memory env) in + let datas = List.map (fun (dinit) -> nr { + dinit; + dmode = (nr Wasm_exts.Ast.Passive); + }) (E.get_data_segments env) in let elems = List.map (fun (fi, fp) -> nr { index = nr 0l; @@ -11766,7 +11809,7 @@ and conclude_module env set_serialization_globals start_fi_o = memories; imports = func_imports @ other_imports; exports = E.get_exports env; - data + datas } in let emodule = @@ -11795,7 +11838,7 @@ and conclude_module env set_serialization_globals start_fi_o = | Some rts -> Linking.LinkModule.link emodule "rts" rts let compile mode rts (prog : Ir.prog) : Wasm_exts.CustomModule.extended_module = - let env = E.mk_global mode rts IC.trap_with (Lifecycle.end_ ()) in + let env = E.mk_global mode rts IC.trap_with in IC.register_globals env; Stack.register_globals env; @@ -11805,7 +11848,8 @@ let compile mode rts (prog : Ir.prog) : Wasm_exts.CustomModule.extended_module = (* See Note [Candid subtype checks] *) let set_serialization_globals = Serialization.register_delayed_globals env in - + Serialization.reserve_global_type_descriptor env; + IC.system_imports env; RTS.system_imports env; diff --git a/src/linking/linkModule.ml b/src/linking/linkModule.ml index 7092c9382d7..4ff2879b139 100644 --- a/src/linking/linkModule.ml +++ b/src/linking/linkModule.ml @@ -382,16 +382,23 @@ let rename_globals rn : module_' -> module_' = fun m -> let table_segment = phrase (table_segment') in let table_segments = List.map table_segment in - let memory_segment' (s : string segment') = { s with offset = const s.offset; } in - let memory_segment = phrase (memory_segment') in - let memory_segments = List.map memory_segment in + let segment_mode' (dmode : segment_mode') = + match dmode with + | Passive -> Passive + | Active { index; offset } -> Active { index; offset = const offset } + | Declarative -> Declarative + in + let segment_mode = phrase (segment_mode') in + let data_segment' (s : data_segment') = { s with dmode = segment_mode s.dmode; } in + let data_segment = phrase (data_segment') in + let data_segments = List.map data_segment in { m with funcs = funcs m.funcs; globals = globals m.globals; elems = table_segments m.elems; - data = memory_segments m.data; + datas = data_segments m.datas; } let set_global global value = fun m -> @@ -433,16 +440,23 @@ let fill_global (global : int32) (value : int32) : module_' -> module_' = fun m let table_segment = phrase (table_segment') in let table_segments = List.map table_segment in - let memory_segment' (s : string segment') = { s with offset = const s.offset; } in - let memory_segment = phrase (memory_segment') in - let memory_segments = List.map memory_segment in + let segment_mode' (dmode : segment_mode') = + match dmode with + | Passive -> Passive + | Active { index; offset } -> Active { index; offset = const offset } + | Declarative -> Declarative + in + let segment_mode = phrase (segment_mode') in + let data_segment' (s : data_segment') = { s with dmode = segment_mode s.dmode; } in + let data_segment = phrase (data_segment') in + let data_segments = List.map data_segment in { m with funcs = funcs m.funcs; globals = globals m.globals; elems = table_segments m.elems; - data = memory_segments m.data; + datas = data_segments m.datas; } let rename_funcs_name_section rn (ns : name_section) = @@ -599,7 +613,7 @@ let join_modules funcs = m1.funcs @ m2.funcs; start = m1.start; elems = m1.elems @ m2.elems; - data = m1.data @ m2.data; + datas = m1.datas @ m2.datas; imports = m1.imports @ m2.imports; exports = m1.exports @ m2.exports; }; diff --git a/src/wasm-exts/ast.ml b/src/wasm-exts/ast.ml index d13c26f7a27..60b4d161d3f 100644 --- a/src/wasm-exts/ast.ml +++ b/src/wasm-exts/ast.ml @@ -7,6 +7,7 @@ Base revision: WebAssembly/spec@a7a1856. The changes are: * Pseudo-instruction Meta for debug information * StableMemory, StableGrow, StableRead, StableWrite instructions. + * Support for passive data segments (incl. `MemoryInit`). The code is otherwise as untouched as possible, so that we can relatively easily apply diffs from the original code (possibly manually). @@ -111,6 +112,7 @@ and instr' = | Store of storeop (* write memory at address *) | MemorySize (* size of linear memory *) | MemoryGrow (* grow linear memory *) + | MemoryInit of var (* initialize memory range from segment *) | Const of literal (* constant *) | Test of testop (* numeric test *) | Compare of relop (* numeric comparison *) @@ -173,8 +175,21 @@ and 'data segment' = } type table_segment = var list segment -type memory_segment = string segment +(* Manual extension to support passive data segements *) +type segment_mode = segment_mode' phrase +and segment_mode' = + | Passive + | Active of {index : var; offset : const} + | Declarative + +type data_segment = data_segment' phrase +and data_segment' = +{ + dinit : string; + dmode : segment_mode; +} +(* End of manual extension *) (* Modules *) @@ -219,7 +234,9 @@ and module_' = funcs : func list; start : var option; elems : var list segment list; - data : string segment list; + (* Manual adjustment for passive data segment support *) + datas : data_segment list; + (* End of manual adjustment *) imports : import list; exports : export list; } @@ -236,7 +253,7 @@ let empty_module = funcs = []; start = None; elems = []; - data = []; + datas = []; imports = []; exports = []; } diff --git a/src/wasm-exts/customModuleDecode.ml b/src/wasm-exts/customModuleDecode.ml index 5993362c757..78226bee53f 100644 --- a/src/wasm-exts/customModuleDecode.ml +++ b/src/wasm-exts/customModuleDecode.ml @@ -4,6 +4,7 @@ reference implementation. The changes are: * Support for additional custom sections + * Support for passive data segments (incl. `MemoryInit`). The code is otherwise as untouched as possible, so that we can relatively easily apply diffs from the original code (possibly manually). @@ -220,6 +221,7 @@ let var s = vu32 s let op s = u8 s let end_ s = expect 0x0b s "END opcode expected" +let zero s = expect 0x00 s "zero byte expected" let memop s = let align = vu32 s in @@ -244,6 +246,11 @@ let math_prefix s = | 0x05 -> i64_trunc_sat_f32_u | 0x06 -> i64_trunc_sat_f64_s | 0x07 -> i64_trunc_sat_f64_u + (* Manual extension for passive data segments *) + | 0x08 -> + let x = at var s in + zero s; memory_init x + (* End of manual extension *) | b -> illegal s pos b let rec instr s = @@ -521,6 +528,7 @@ let id s = | 9 -> `ElemSection | 10 -> `CodeSection | 11 -> `DataSection + | 12 -> `DataCountSection | _ -> error s (pos s) "malformed section id" ) bo @@ -647,6 +655,21 @@ let code_section s = (* Element section *) +(* Manual extension for passive data segments *) +let passive s = + Passive + +let active s = + let index = at var s in + let offset = const s in + Active {index; offset} + +let active_zero s = + let index = Source.(0l @@ no_region) in + let offset = const s in + Active {index; offset} +(* End of manual extension *) + let segment dat s = let index = at var s in let offset = const s in @@ -660,14 +683,37 @@ let elem_section s = section `ElemSection (vec (at table_segment)) [] s +(* Manual extension for passive data segments *) (* Data section *) -let memory_segment s = - segment string s +let data s = + match vu32 s with + | 0x00l -> + let dmode = at active_zero s in + let dinit = string s in + {dinit; dmode} + | 0x01l -> + let dmode = at passive s in + let dinit = string s in + {dinit; dmode} + | 0x02l -> + let dmode = at active s in + let dinit = string s in + {dinit; dmode} + | _ -> error s (pos s - 1) "malformed data segment kind" let data_section s = - section `DataSection (vec (at memory_segment)) [] s + section `DataSection (vec (at data)) [] s + + +(* DataCount section *) +let data_count s = + Some (vu32 s) + +let data_count_section s = + section `DataCountSection data_count None s +(* End of manual extension *) (* Custom sections *) @@ -896,9 +942,11 @@ let module_ s = iterate skip_custom_section s; let elems = elem_section s in iterate skip_custom_section s; + let data_count = data_count_section s in + iterate skip_custom_section s; let func_bodies = code_section s in iterate skip_custom_section s; - let data = data_section s in + let datas = data_section s in iterate skip_custom_section s; let name = name_section s in iterate skip_custom_section s; @@ -912,12 +960,14 @@ let module_ s = require (pos s = len s) s (len s) "junk after last section"; require (List.length func_types = List.length func_bodies) s (len s) "function and code section have inconsistent lengths"; + require (data_count = None || data_count = Some (Lib.List32.length datas)) + s (len s) "data count and data section have inconsistent lengths"; let funcs = List.map2 Source.(fun t f -> {f.it with ftype = t} @@ f.at) func_types func_bodies in { module_ = - {types; tables; memories; globals; funcs; imports; exports; elems; data; start}; + {types; tables; memories; globals; funcs; imports; exports; elems; datas; start}; dylink; name; motoko; diff --git a/src/wasm-exts/customModuleEncode.ml b/src/wasm-exts/customModuleEncode.ml index 5e2c5feacdd..b5366409b30 100644 --- a/src/wasm-exts/customModuleEncode.ml +++ b/src/wasm-exts/customModuleEncode.ml @@ -5,6 +5,7 @@ reference implementation. The changes are: * Support for writing out a source map for the Code parts * Support for additional custom sections + * Support for passive data segments (incl. `MemoryInit`). The code is otherwise as untouched as possible, so that we can relatively easily apply diffs from the original code (possibly manually). @@ -471,6 +472,10 @@ let encode (em : extended_module) = | MemorySize -> op 0x3f; u8 0x00 | MemoryGrow -> op 0x40; u8 0x00 + (* Manual extension for passive data segments *) + | MemoryInit x -> op 0xfc; vu32 0x08l; var x; u8 0x00 + (* End of manual extension *) + | Const {it = I32 c; _} -> op 0x41; vs32 c | Const {it = I64 c; _} -> op 0x42; vs64 c | Const {it = F32 c; _} -> op 0x43; f32 c @@ -804,12 +809,29 @@ let encode (em : extended_module) = let elem_section elems = section 9 (vec table_segment) elems (elems <> []) + (* Manual extension for passive data segments *) (* Data section *) - let memory_segment seg = - segment string seg - let data_section data = - section 11 (vec memory_segment) data (data <> []) + let data seg = + let {dinit; dmode} = seg.it in + match dmode.it with + | Passive -> + vu32 0x01l; string dinit + | Active {index; offset} when index.it = 0l -> + vu32 0x00l; const offset; string dinit + | Active {index; offset} -> + vu32 0x02l; var index; const offset; string dinit + | Declarative -> + failwith "illegal declarative data segment" + + let data_section datas = + section 11 (vec data) datas (datas <> []) + + (* Data count section *) + + let data_count_section datas m = + section 12 len (List.length datas) (datas <> []) + (* End of manual extension *) (* sourceMappingURL section *) @@ -1229,8 +1251,9 @@ let encode (em : extended_module) = export_section m.exports; start_section m.start; elem_section m.elems; + data_count_section m.datas m; code_section m.funcs; - data_section m.data; + data_section m.datas; (* other optional sections *) name_section em.name; candid_sections em.candid; diff --git a/src/wasm-exts/operators.ml b/src/wasm-exts/operators.ml index 2c35fe6aa20..7e000e1ae91 100644 --- a/src/wasm-exts/operators.ml +++ b/src/wasm-exts/operators.ml @@ -7,7 +7,7 @@ that it got basically replicated into the customModuleDecode.ml file. Base revision: WebAssembly/spec@a7a1856. The changes are: - * None for now + * Support for passive data segments (incl. `MemoryInit`). The code is otherwise as untouched as possible, so that we can relatively easily apply diffs from the original code (possibly manually). @@ -230,3 +230,6 @@ let f64_reinterpret_i64 = Convert (F64 F64Op.ReinterpretInt) let memory_size = MemorySize let memory_grow = MemoryGrow +(* Manual extension for passive data segments *) +let memory_init x = MemoryInit x +(* End of manual extension *)