diff --git a/nova_vm/src/ecmascript/types/language/string.rs b/nova_vm/src/ecmascript/types/language/string.rs index fcfbc1b8e..6cf830ffa 100644 --- a/nova_vm/src/ecmascript/types/language/string.rs +++ b/nova_vm/src/ecmascript/types/language/string.rs @@ -29,6 +29,7 @@ use crate::{ }; pub use data::StringHeapData; +use wtf8::Wtf8Buf; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(transparent)] @@ -322,11 +323,32 @@ impl<'a> String<'a> { Empty, ExistingString(HeapString<'a>), SmallString { data: [u8; 7], len: usize }, - String(std::string::String), + String(Wtf8Buf), + } + let strings = strings.as_ref(); + let mut status = if strings.len() > 1 { + let len = strings.iter().fold(0usize, |a, s| a + s.len(agent)); + if len > 7 { + Status::String(Wtf8Buf::with_capacity(len)) + } else { + Status::Empty + } + } else { + Status::Empty + }; + + fn push_string_to_wtf8(agent: &Agent, buf: &mut Wtf8Buf, string: String) { + match string { + String::String(heap_string) => { + buf.push_wtf8(agent[heap_string].as_wtf8()); + } + String::SmallString(small_string) => { + buf.push_str(small_string.as_str()); + } + } } - let mut status = Status::Empty; - for string in strings.as_ref() { + for string in strings { if string.is_empty_string() { continue; } @@ -341,11 +363,12 @@ impl<'a> String<'a> { String::String(idx) => Status::ExistingString(*idx), }; } - Status::ExistingString(idx) => { + Status::ExistingString(heap_string) => { + let heap_string = *heap_string; let mut result = - std::string::String::with_capacity(agent[*idx].len() + string.len(agent)); - result.push_str(agent[*idx].as_str()); - result.push_str(string.as_str(agent)); + Wtf8Buf::with_capacity(agent[heap_string].len() + string.len(agent)); + result.push_wtf8(agent[heap_string].as_wtf8()); + push_string_to_wtf8(agent, &mut result, *string); status = Status::String(result) } Status::SmallString { data, len } => { @@ -358,15 +381,15 @@ impl<'a> String<'a> { .copy_from_slice(&smstr.data()[..string_len]); *len += string_len; } else { - let mut result = std::string::String::with_capacity(*len + string_len); + let mut result = Wtf8Buf::with_capacity(*len + string_len); // SAFETY: Since SmallStrings are guaranteed UTF-8, `&data[..len]` is the result // of concatenating UTF-8 strings, which is always valid UTF-8. result.push_str(unsafe { std::str::from_utf8_unchecked(&data[..*len]) }); - result.push_str(string.as_str(agent)); + push_string_to_wtf8(agent, &mut result, *string); status = Status::String(result); } } - Status::String(buffer) => buffer.push_str(string.as_str(agent)), + Status::String(buffer) => push_string_to_wtf8(agent, buffer, *string), } } @@ -379,7 +402,7 @@ impl<'a> String<'a> { let str_slice = unsafe { std::str::from_utf8_unchecked(&data[..len]) }; SmallString::from_str_unchecked(str_slice).into() } - Status::String(string) => agent.heap.create(string).bind(gc), + Status::String(string) => agent.heap.create(string.into_string().unwrap()).bind(gc), } } @@ -559,9 +582,8 @@ impl Scoped<'_, String<'static>> { } } -impl CreateHeapData> for Heap { - fn create(&mut self, data: StringHeapData) -> String<'static> { - let hash = self.string_hasher.hash_one(data.as_str()); +impl CreateHeapData<(StringHeapData, u64), String<'static>> for Heap { + fn create(&mut self, (data, hash): (StringHeapData, u64)) -> String<'static> { self.strings.push(Some(data)); let index = StringIndex::last(&self.strings); let heap_string = HeapString(index); diff --git a/nova_vm/src/ecmascript/types/language/string/data.rs b/nova_vm/src/ecmascript/types/language/string/data.rs index d393783ca..452911f91 100644 --- a/nova_vm/src/ecmascript/types/language/string/data.rs +++ b/nova_vm/src/ecmascript/types/language/string/data.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::{cell::OnceCell, num::NonZeroUsize}; +use std::{cell::OnceCell, hash::Hash, num::NonZeroUsize}; use wtf8::{Wtf8, Wtf8Buf}; @@ -24,7 +24,12 @@ impl PartialEq for StringHeapData { return true; } } - self.as_str() == other.as_str() + match (&self.data, &other.data) { + (StringBuffer::Owned(a), StringBuffer::Owned(b)) => a == b, + (StringBuffer::Owned(a), StringBuffer::Static(b)) => a == b, + (StringBuffer::Static(a), StringBuffer::Owned(b)) => a == b, + (StringBuffer::Static(a), StringBuffer::Static(b)) => a == b, + } } } impl Eq for StringHeapData {} @@ -47,6 +52,15 @@ pub(crate) enum StringBuffer { Static(&'static Wtf8), } +impl Hash for StringBuffer { + fn hash(&self, state: &mut H) { + match self { + StringBuffer::Owned(wtf8_buf) => wtf8_buf.hash(state), + StringBuffer::Static(wtf8) => wtf8.hash(state), + } + } +} + impl StringHeapData { /// The maximum UTf-16 length of a JS string, according to the spec (2^53 - 1). const MAX_UTF16_LENGTH: usize = (1 << 53) - 1; @@ -213,6 +227,13 @@ impl StringHeapData { } } + pub fn as_wtf8(&self) -> &Wtf8 { + match &self.data { + StringBuffer::Owned(buf) => buf, + StringBuffer::Static(buf) => buf, + } + } + pub fn from_str(str: &str) -> Self { debug_assert!(str.len() > 7); assert!(str.len() <= Self::MAX_UTF8_LENGTH, "String is too long."); diff --git a/nova_vm/src/ecmascript/types/language/value.rs b/nova_vm/src/ecmascript/types/language/value.rs index f74544cc2..bedd85df3 100644 --- a/nova_vm/src/ecmascript/types/language/value.rs +++ b/nova_vm/src/ecmascript/types/language/value.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use wtf8::Wtf8; + use super::{ bigint::{HeapBigInt, SmallBigInt}, number::HeapNumber, @@ -640,10 +642,10 @@ impl Value { } Value::String(data) => { // Skip discriminant hashing in strings - arena[data].as_str().hash(hasher); + arena[data].data.hash(hasher); } Value::SmallString(data) => { - data.as_str().hash(hasher); + Wtf8::from_str(data.as_str()).hash(hasher); } Value::Symbol(data) => { discriminant.hash(hasher); diff --git a/nova_vm/src/engine/bytecode/executable.rs b/nova_vm/src/engine/bytecode/executable.rs index 536e8068f..fa9a141c1 100644 --- a/nova_vm/src/engine/bytecode/executable.rs +++ b/nova_vm/src/engine/bytecode/executable.rs @@ -265,30 +265,46 @@ impl Executable { } pub(super) fn get_instruction(instructions: &[u8], ip: &mut usize) -> Option { - if *ip >= instructions.len() { + let len = instructions.len(); + let cur_ip = *ip; + if cur_ip >= len { return None; } - - let kind: Instruction = unsafe { std::mem::transmute::(instructions[*ip]) }; *ip += 1; - - let mut args: [Option; 2] = [None, None]; - - for item in args.iter_mut().take(kind.argument_count() as usize) { - let length = instructions[*ip..].len(); - if length >= 2 { - let bytes = IndexType::from_ne_bytes(unsafe { - *std::mem::transmute::<*const u8, *const [u8; 2]>(instructions[*ip..].as_ptr()) - }); + let kind: Instruction = unsafe { std::mem::transmute::(instructions[cur_ip]) }; + + let arg_count = kind.argument_count() as usize; + + let cur_ip = *ip; + match arg_count { + 0 => Some(Instr { + kind, + args: [None, None], + }), + 1 => { + let bytes: [u8; 2] = [instructions[cur_ip], instructions[cur_ip + 1]]; + let arg0 = IndexType::from_ne_bytes(bytes); *ip += 2; - *item = Some(bytes); - } else { - *ip += 1; - *item = None; + Some(Instr { + kind, + args: [Some(arg0), None], + }) + } + 2 => { + let bytes: [[u8; 2]; 2] = [ + [instructions[cur_ip], instructions[cur_ip + 1]], + [instructions[cur_ip + 2], instructions[cur_ip + 3]], + ]; + let arg0 = IndexType::from_ne_bytes(bytes[0]); + let arg1 = IndexType::from_ne_bytes(bytes[1]); + *ip += 4; + Some(Instr { + kind, + args: [Some(arg0), Some(arg1)], + }) } + _ => unreachable!(), } - - Some(Instr { kind, args }) } impl Index for Agent { diff --git a/nova_vm/src/engine/bytecode/vm.rs b/nova_vm/src/engine/bytecode/vm.rs index a98058fec..2da403566 100644 --- a/nova_vm/src/engine/bytecode/vm.rs +++ b/nova_vm/src/engine/bytecode/vm.rs @@ -68,6 +68,8 @@ use crate::{ heap::{CompactionLists, HeapMarkAndSweep, WellKnownSymbolIndexes, WorkQueues}, }; +use super::executable::get_instruction; + struct EmptyParametersList(ast::FormalParameters<'static>); unsafe impl Send for EmptyParametersList {} unsafe impl Sync for EmptyParametersList {} @@ -289,7 +291,9 @@ impl<'a> Vm { let do_gc = !agent.options.disable_gc; #[cfg(feature = "interleaved-gc")] let mut instr_count = 0u8; - while let Some(instr) = executable.get_instruction(agent, &mut self.ip) { + + let instructions = executable.get_instructions(agent); + while let Some(instr) = get_instruction(instructions, &mut self.ip) { #[cfg(feature = "interleaved-gc")] if do_gc { instr_count = instr_count.wrapping_add(1); diff --git a/nova_vm/src/heap.rs b/nova_vm/src/heap.rs index 8fddf478a..45de816af 100644 --- a/nova_vm/src/heap.rs +++ b/nova_vm/src/heap.rs @@ -94,7 +94,7 @@ use crate::{ use ahash::AHashMap; use hashbrown::HashTable; pub(crate) use heap_bits::{CompactionLists, HeapMarkAndSweep, WorkQueues}; -use indexes::IntoBaseIndex; +use wtf8::Wtf8; #[derive(Debug)] pub struct Heap { @@ -322,11 +322,13 @@ impl Heap { /// guaranteed to never equal true. pub(crate) unsafe fn alloc_str(&mut self, message: &str) -> String<'static> { let found = self.find_equal_string(message); - if let Some(idx) = found { - return idx; + match found { + Ok(string) => string, + Err(hash) => { + let data = StringHeapData::from_str(message); + self.create((data, hash)) + } } - let data = StringHeapData::from_str(message); - self.create(data) } /// Allocate a static string onto the Agent heap @@ -343,11 +345,13 @@ impl Heap { /// guaranteed to never equal true. unsafe fn alloc_string(&mut self, message: std::string::String) -> String<'static> { let found = self.find_equal_string(message.as_str()); - if let Some(idx) = found { - return idx; + match found { + Ok(string) => string, + Err(hash) => { + let data = StringHeapData::from_string(message); + self.create((data, hash)) + } } - let data = StringHeapData::from_string(message); - self.create(data) } /// Allocate a static string onto the Agent heap @@ -364,24 +368,30 @@ impl Heap { /// guaranteed to never equal true. pub(crate) unsafe fn alloc_static_str(&mut self, message: &'static str) -> String<'static> { let found = self.find_equal_string(message); - if let Some(idx) = found { - return idx; + match found { + Ok(string) => string, + Err(hash) => { + let data = StringHeapData::from_static_str(message); + self.create((data, hash)) + } } - let data = StringHeapData::from_static_str(message); - self.create(data) } - fn find_equal_string(&self, message: &str) -> Option> { + /// Find existing heap String or return the strings hash. + fn find_equal_string(&self, message: &str) -> Result, u64> { debug_assert!(message.len() > 7); + let message = Wtf8::from_str(message); let hash = self.string_hasher.hash_one(message); self.string_lookup_table .find(hash, |heap_string| { - let heap_str = self.strings[heap_string.into_base_index().into_index()] + let heap_str = self.strings[heap_string.get_index()] .as_ref() - .map(|string| string.as_str()); - heap_str == Some(message) + .unwrap() + .as_wtf8(); + heap_str == message }) .map(|&heap_string| heap_string.into()) + .ok_or(hash) } /// Allocate a 64-bit floating point number onto the Agent heap