diff --git a/compiler+runtime/CMakeLists.txt b/compiler+runtime/CMakeLists.txt index 2514f630b..c2f239b8c 100644 --- a/compiler+runtime/CMakeLists.txt +++ b/compiler+runtime/CMakeLists.txt @@ -167,6 +167,7 @@ add_library( src/cpp/jank/runtime/obj/multi_function.cpp src/cpp/jank/runtime/obj/symbol.cpp src/cpp/jank/runtime/obj/keyword.cpp + src/cpp/jank/runtime/obj/character.cpp src/cpp/jank/runtime/obj/persistent_list.cpp src/cpp/jank/runtime/obj/persistent_vector.cpp src/cpp/jank/runtime/obj/persistent_vector_sequence.cpp diff --git a/compiler+runtime/include/cpp/jank/native_persistent_string.hpp b/compiler+runtime/include/cpp/jank/native_persistent_string.hpp index 31117994d..b0930f008 100644 --- a/compiler+runtime/include/cpp/jank/native_persistent_string.hpp +++ b/compiler+runtime/include/cpp/jank/native_persistent_string.hpp @@ -90,14 +90,13 @@ namespace jank } } - [[gnu::nonnull(2)]] - constexpr native_persistent_string(const_pointer_type const s) noexcept + [[gnu::nonnull(2)]] constexpr native_persistent_string(const_pointer_type const s) noexcept : native_persistent_string{ s, traits_type::length(s) } { } - [[gnu::nonnull(2)]] - constexpr native_persistent_string(const_pointer_type const s, size_type const size) noexcept + [[gnu::nonnull(2)]] constexpr native_persistent_string(const_pointer_type const s, + size_type const size) noexcept { if(size <= max_small_size) { @@ -109,11 +108,10 @@ namespace jank } } - [[gnu::nonnull(2, 4)]] - constexpr native_persistent_string(const_pointer_type const lhs, - size_type const lhs_size, - const_pointer_type const rhs, - size_type const rhs_size) noexcept + [[gnu::nonnull(2, 4)]] constexpr native_persistent_string(const_pointer_type const lhs, + size_type const lhs_size, + const_pointer_type const rhs, + size_type const rhs_size) noexcept { auto const combined_size(lhs_size + rhs_size); if(combined_size <= max_small_size) @@ -232,7 +230,8 @@ namespace jank } [[gnu::const]] - constexpr value_type operator[](size_t const index) const noexcept + constexpr value_type + operator[](size_t const index) const noexcept { return data()[index]; } @@ -481,27 +480,31 @@ namespace jank /*** Comparisons. ***/ [[gnu::const]] - constexpr native_bool operator!=(native_persistent_string const &s) const noexcept + constexpr native_bool + operator!=(native_persistent_string const &s) const noexcept { auto const length(size()); return length != s.size() || traits_type::compare(data(), s.data(), length); } [[gnu::const]] - constexpr native_bool operator==(native_persistent_string const &s) const noexcept + constexpr native_bool + operator==(native_persistent_string const &s) const noexcept { return !(*this != s); } [[gnu::const, gnu::nonnull(2)]] - constexpr native_bool operator!=(const_pointer_type const s) const noexcept + constexpr native_bool + operator!=(const_pointer_type const s) const noexcept { auto const length(traits_type::length(s)); return size() != length || traits_type::compare(data(), s, length); } [[gnu::const, gnu::nonnull(2)]] - constexpr native_bool operator==(const_pointer_type const s) const noexcept + constexpr native_bool + operator==(const_pointer_type const s) const noexcept { return !(*this != s); } diff --git a/compiler+runtime/include/cpp/jank/read/lex.hpp b/compiler+runtime/include/cpp/jank/read/lex.hpp index 53f7e06d2..d4678baba 100644 --- a/compiler+runtime/include/cpp/jank/read/lex.hpp +++ b/compiler+runtime/include/cpp/jank/read/lex.hpp @@ -33,6 +33,8 @@ namespace jank::read::lex /* Has bool data. */ boolean, /* Has string data. */ + character, + /* Has string data. */ symbol, /* Has string data. */ keyword, diff --git a/compiler+runtime/include/cpp/jank/read/parse.hpp b/compiler+runtime/include/cpp/jank/read/parse.hpp index 827d6555b..0a2809e84 100644 --- a/compiler+runtime/include/cpp/jank/read/parse.hpp +++ b/compiler+runtime/include/cpp/jank/read/parse.hpp @@ -57,6 +57,7 @@ namespace jank::read::parse object_result parse_vector(); object_result parse_map(); object_result parse_quote(); + object_result parse_character(); object_result parse_meta_hint(); object_result parse_reader_macro(); object_result parse_reader_macro_set(); diff --git a/compiler+runtime/include/cpp/jank/runtime/behavior/callable.hpp b/compiler+runtime/include/cpp/jank/runtime/behavior/callable.hpp index 72cb1b93e..5f880371d 100644 --- a/compiler+runtime/include/cpp/jank/runtime/behavior/callable.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/behavior/callable.hpp @@ -200,9 +200,15 @@ namespace jank::runtime /* TODO: Is this needed? A non-callable function-like would need to define all call overloads? :( */ template concept function_like = requires(T * const t) { - { t->call(object_ptr{}) } -> std::convertible_to; - { t->call(object_ptr{}, object_ptr{}) } -> std::convertible_to; - { t->call(object_ptr{}, object_ptr{}, object_ptr{}) } -> std::convertible_to; + { + t->call(object_ptr{}) + } -> std::convertible_to; + { + t->call(object_ptr{}, object_ptr{}) + } -> std::convertible_to; + { + t->call(object_ptr{}, object_ptr{}, object_ptr{}) + } -> std::convertible_to; { t->call(object_ptr{}, object_ptr{}, object_ptr{}, object_ptr{}) } -> std::convertible_to; @@ -243,7 +249,9 @@ namespace jank::runtime object_ptr{}) } -> std::convertible_to; - { t->get_arity_flags() } -> std::convertible_to; + { + t->get_arity_flags() + } -> std::convertible_to; }; } } diff --git a/compiler+runtime/include/cpp/jank/runtime/detail/object_util.hpp b/compiler+runtime/include/cpp/jank/runtime/detail/object_util.hpp index 72fb79b8f..ffacb18a9 100644 --- a/compiler+runtime/include/cpp/jank/runtime/detail/object_util.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/detail/object_util.hpp @@ -37,6 +37,7 @@ namespace jank using persistent_string = static_object; using persistent_list = static_object; using symbol = static_object; + using character = static_object; } /* TODO: Constexpr these. */ @@ -65,6 +66,12 @@ namespace jank return make_box(i); } + [[gnu::always_inline, gnu::flatten, gnu::hot]] + inline auto make_box(char const i) + { + return make_box(i); + } + [[gnu::always_inline, gnu::flatten, gnu::hot]] inline auto make_box(size_t const i) { diff --git a/compiler+runtime/include/cpp/jank/runtime/erasure.hpp b/compiler+runtime/include/cpp/jank/runtime/erasure.hpp index a65effd6d..50f3371a1 100644 --- a/compiler+runtime/include/cpp/jank/runtime/erasure.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/erasure.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -172,6 +173,11 @@ namespace jank::runtime return fn(expect_object(erased), std::forward(args)...); } break; + case object_type::character: + { + return fn(expect_object(erased), std::forward(args)...); + } + break; case object_type::persistent_vector: { return fn(expect_object(erased), std::forward(args)...); diff --git a/compiler+runtime/include/cpp/jank/runtime/obj/character.hpp b/compiler+runtime/include/cpp/jank/runtime/obj/character.hpp new file mode 100644 index 000000000..54472c54b --- /dev/null +++ b/compiler+runtime/include/cpp/jank/runtime/obj/character.hpp @@ -0,0 +1,35 @@ +#pragma once + +#include + +namespace jank::runtime +{ + template <> + struct static_object : gc + { + static constexpr native_bool pointer_free{ false }; + + static_object() = default; + static_object(static_object &&) = default; + static_object(static_object const &) = default; + static_object(native_persistent_string const &); + static_object(char); + + /* behavior::objectable */ + native_bool equal(object const &) const; + native_persistent_string const &to_string() const; + void to_string(fmt::memory_buffer &buff) const; + native_hash to_hash() const; + + object base{ object_type::character }; + + /* Holds the litereal form of the character as it's written eg. "\\tab" */ + native_persistent_string data; + }; + + namespace obj + { + using character = static_object; + using character_ptr = native_box; + } +} diff --git a/compiler+runtime/include/cpp/jank/runtime/object.hpp b/compiler+runtime/include/cpp/jank/runtime/object.hpp index 54cbe4020..fc2f3e1bd 100644 --- a/compiler+runtime/include/cpp/jank/runtime/object.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/object.hpp @@ -16,6 +16,7 @@ namespace jank::runtime persistent_string, keyword, symbol, + character, persistent_list, persistent_vector, persistent_array_map, @@ -69,11 +70,21 @@ namespace jank::runtime { template concept objectable = requires(T * const t) { - { t->equal(std::declval()) } -> std::convertible_to; - { t->to_string() } -> std::convertible_to; - { t->to_string(std::declval()) } -> std::same_as; - { t->to_hash() } -> std::convertible_to; - { t->base } -> std::same_as; + { + t->equal(std::declval()) + } -> std::convertible_to; + { + t->to_string() + } -> std::convertible_to; + { + t->to_string(std::declval()) + } -> std::same_as; + { + t->to_hash() + } -> std::convertible_to; + { + t->base + } -> std::same_as; }; } } diff --git a/compiler+runtime/src/cpp/jank/analyze/processor.cpp b/compiler+runtime/src/cpp/jank/analyze/processor.cpp index 11a01c423..76342110a 100644 --- a/compiler+runtime/src/cpp/jank/analyze/processor.cpp +++ b/compiler+runtime/src/cpp/jank/analyze/processor.cpp @@ -6,6 +6,7 @@ #include +#include #include #include #include @@ -32,18 +33,18 @@ namespace jank::analyze }; }; specials = { - { make_box("def"), make_fn(&processor::analyze_def) }, - { make_box("fn*"), make_fn(&processor::analyze_fn) }, - { make_box("recur"), make_fn(&processor::analyze_recur) }, - { make_box("do"), make_fn(&processor::analyze_do) }, - { make_box("let*"), make_fn(&processor::analyze_let) }, - { make_box("loop*"), make_fn(&processor::analyze_loop) }, - { make_box("if"), make_fn(&processor::analyze_if) }, - { make_box("quote"), make_fn(&processor::analyze_quote) }, - { make_box("var"), make_fn(&processor::analyze_var_call) }, - { make_box("throw"), make_fn(&processor::analyze_throw) }, - { make_box("try"), make_fn(&processor::analyze_try) }, - { make_box("native/raw"), make_fn(&processor::analyze_native_raw) }, + { make_box("def"), make_fn(&processor::analyze_def)}, + { make_box("fn*"), make_fn(&processor::analyze_fn)}, + { make_box("recur"), make_fn(&processor::analyze_recur)}, + { make_box("do"), make_fn(&processor::analyze_do)}, + { make_box("let*"), make_fn(&processor::analyze_let)}, + { make_box("loop*"), make_fn(&processor::analyze_loop)}, + { make_box("if"), make_fn(&processor::analyze_if)}, + { make_box("quote"), make_fn(&processor::analyze_quote)}, + { make_box("var"), make_fn(&processor::analyze_var_call)}, + { make_box("throw"), make_fn(&processor::analyze_throw)}, + { make_box("try"), make_fn(&processor::analyze_try)}, + {make_box("native/raw"), make_fn(&processor::analyze_native_raw)}, }; } @@ -153,7 +154,7 @@ namespace jank::analyze } return make_box(expr::def{ - expression_base{ {}, expr_type, current_frame, true }, + expression_base{{}, expr_type, current_frame, true}, qualified_sym, value_expr }); @@ -205,7 +206,7 @@ namespace jank::analyze } return make_box(expr::local_reference{ - expression_base{ {}, expr_type, current_frame, needs_box }, + expression_base{{}, expr_type, current_frame, needs_box}, sym, unwrapped_local.binding }); @@ -227,7 +228,7 @@ namespace jank::analyze current_frame->lift_var(qualified_sym); } return make_box(expr::var_deref{ - expression_base{ {}, expr_type, current_frame }, + expression_base{{}, expr_type, current_frame}, qualified_sym, unwrapped_var }); @@ -325,7 +326,7 @@ namespace jank::analyze fn_ctx->is_variadic = is_variadic; fn_ctx->param_count = param_symbols.size(); expr::do_ body_do{ - expression_base{ {}, expression_type::return_statement, frame } + expression_base{{}, expression_type::return_statement, frame} }; size_t const form_count{ list->count() - 1 }; size_t i{}; @@ -350,10 +351,10 @@ namespace jank::analyze } return { - expr::function_arity{ std::move(param_symbols), + expr::function_arity{std::move(param_symbols), std::move(body_do), std::move(frame), - std::move(fn_ctx) } + std::move(fn_ctx)} }; } @@ -502,7 +503,7 @@ namespace jank::analyze .to_string())))); auto ret(make_box(expr::function{ - expression_base{ {}, expr_type, current_frame }, + expression_base{{}, expr_type, current_frame}, name, unique_name, std::move(arities), @@ -577,7 +578,7 @@ namespace jank::analyze fn_ctx.unwrap()->is_tail_recursive = true; return make_box(expr::recur{ - expression_base{ {}, expr_type, current_frame }, + expression_base{{}, expr_type, current_frame}, make_box(list->data.rest()), arg_exprs }); @@ -864,7 +865,7 @@ namespace jank::analyze } return make_box(expr::if_{ - expression_base{ {}, expr_type, current_frame, needs_box }, + expression_base{{}, expr_type, current_frame, needs_box}, condition_expr.expect_ok(), then_expr.expect_ok(), else_expr_opt @@ -918,7 +919,7 @@ namespace jank::analyze } return make_box(expr::var_ref{ - expression_base{ {}, expr_type, current_frame, true }, + expression_base{{}, expr_type, current_frame, true}, qualified_sym, found_var.unwrap() }); @@ -934,7 +935,7 @@ namespace jank::analyze auto const qualified_sym( current_frame->lift_var(make_box(o->n->name->name, o->name->name))); return make_box(expr::var_ref{ - expression_base{ {}, expr_type, current_frame, true }, + expression_base{{}, expr_type, current_frame, true}, qualified_sym, o }); @@ -960,7 +961,7 @@ namespace jank::analyze } return make_box(expr::throw_{ - expression_base{ {}, expr_type, current_frame, true }, + expression_base{{}, expr_type, current_frame, true}, arg_expr.unwrap_move() }); } @@ -973,7 +974,7 @@ namespace jank::analyze native_bool const) { expr::try_ ret{ - expression_base{ {}, expr_type, current_frame } + expression_base{{}, expr_type, current_frame} }; /* Clojure JVM doesn't support recur across try/catch/finally, so we don't either. */ @@ -1165,8 +1166,8 @@ namespace jank::analyze /* Once we've found the start of an interpolation, we begin lexing/parsing at that * spot, so we can get a jank value. */ read::lex::processor l_prc{ - { code_str->data.data() + next_interp + interp_start.size(), - code_str->data.data() + code_str->data.size() } + {code_str->data.data() + next_interp + interp_start.size(), + code_str->data.data() + code_str->data.size()} }; read::parse::processor p_prc{ l_prc.begin(), l_prc.end() }; auto parsed_obj(p_prc.next()); @@ -1216,7 +1217,7 @@ namespace jank::analyze } return make_box(expr::native_raw{ - expression_base{ {}, expr_type, current_frame, true }, + expression_base{{}, expr_type, current_frame, true}, std::move(chunks) }); } @@ -1230,7 +1231,7 @@ namespace jank::analyze { current_frame->lift_constant(o); return make_box(expr::primitive_literal{ - expression_base{ {}, expr_type, current_frame, needs_box }, + expression_base{{}, expr_type, current_frame, needs_box}, o }); } @@ -1264,7 +1265,7 @@ namespace jank::analyze { /* Eval the literal to resolve exprs such as quotes. */ auto const pre_eval_expr(make_box(expr::vector{ - expression_base{ {}, expr_type, current_frame, true }, + expression_base{{}, expr_type, current_frame, true}, std::move(exprs), o->meta })); @@ -1274,13 +1275,13 @@ namespace jank::analyze current_frame->lift_constant(o); return make_box(expr::primitive_literal{ - expression_base{ {}, expr_type, current_frame, true }, + expression_base{{}, expr_type, current_frame, true}, o }); } return make_box(expr::vector{ - expression_base{ {}, expr_type, current_frame, true }, + expression_base{{}, expr_type, current_frame, true}, std::move(exprs), o->meta }); @@ -1313,7 +1314,7 @@ namespace jank::analyze /* TODO: Uniqueness check. */ return make_box(expr::map{ - expression_base{ {}, expr_type, current_frame, true }, + expression_base{{}, expr_type, current_frame, true}, std::move(exprs), o->meta }); @@ -1347,7 +1348,7 @@ namespace jank::analyze { /* Eval the literal to resolve exprs such as quotes. */ auto const pre_eval_expr(make_box(expr::set{ - expression_base{ {}, expr_type, current_frame, true }, + expression_base{{}, expr_type, current_frame, true}, std::move(exprs), o->meta })); @@ -1357,13 +1358,13 @@ namespace jank::analyze current_frame->lift_constant(o); return make_box(expr::primitive_literal{ - expression_base{ {}, expr_type, current_frame, true }, + expression_base{{}, expr_type, current_frame, true}, o }); } return make_box(expr::set{ - expression_base{ {}, expr_type, current_frame, true }, + expression_base{{}, expr_type, current_frame, true}, std::move(exprs), o->meta }); @@ -1488,7 +1489,7 @@ namespace jank::analyze } return make_box(expr::call{ - expression_base{ {}, expr_type, current_frame, needs_ret_box }, + expression_base{{}, expr_type, current_frame, needs_ret_box}, source, make_box(o->data.rest()), arg_exprs, @@ -1536,7 +1537,8 @@ namespace jank::analyze else if constexpr(runtime::behavior::numberable || std::same_as || std::same_as || std::same_as - || std::same_as) + || std::same_as + || std::same_as) { return analyze_primitive_literal(o, current_frame, expr_type, fn_ctx, needs_box); } diff --git a/compiler+runtime/src/cpp/jank/codegen/processor.cpp b/compiler+runtime/src/cpp/jank/codegen/processor.cpp index e4aa8aa48..25359678b 100644 --- a/compiler+runtime/src/cpp/jank/codegen/processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/processor.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -79,6 +80,14 @@ namespace jank::codegen } return "jank::native_integer"; } + case jank::runtime::object_type::character: + { + if(boxed) + { + return "jank::runtime::obj::character_ptr"; + } + return "jank::runtime::obj::character"; + } case jank::runtime::object_type::real: { if(boxed) @@ -186,6 +195,13 @@ namespace jank::codegen typed_o->name); } } + else if constexpr(std::same_as) + { + fmt::format_to( + inserter, + R"(jank::make_box({}))", + util::escaped_quoted_view(typed_o->data)); + } else if constexpr(std::same_as) { fmt::format_to( diff --git a/compiler+runtime/src/cpp/jank/evaluate.cpp b/compiler+runtime/src/cpp/jank/evaluate.cpp index a8e1c8559..c8f3e6f65 100644 --- a/compiler+runtime/src/cpp/jank/evaluate.cpp +++ b/compiler+runtime/src/cpp/jank/evaluate.cpp @@ -52,10 +52,10 @@ namespace jank::evaluate if(exprs.empty()) { return wrap_expression(analyze::expr::primitive_literal{ - analyze::expression_base{ {}, + analyze::expression_base{{}, analyze::expression_type::return_statement, an_prc.root_frame, - true }, + true}, runtime::obj::nil::nil_const() }); } diff --git a/compiler+runtime/src/cpp/jank/jit/processor.cpp b/compiler+runtime/src/cpp/jank/jit/processor.cpp index e406c9682..32204b1e6 100644 --- a/compiler+runtime/src/cpp/jank/jit/processor.cpp +++ b/compiler+runtime/src/cpp/jank/jit/processor.cpp @@ -136,7 +136,7 @@ namespace jank::jit profile::timer timer{ "jit eval" }; /* TODO: Improve Cling to accept string_views instead. */ auto const str(cg_prc.declaration_str()); - //fmt::println("{}", str); + // fmt::println("{}", str); interpreter->declare(static_cast(cg_prc.declaration_str())); diff --git a/compiler+runtime/src/cpp/jank/read/lex.cpp b/compiler+runtime/src/cpp/jank/read/lex.cpp index ee6314e28..adb9d17de 100644 --- a/compiler+runtime/src/cpp/jank/read/lex.cpp +++ b/compiler+runtime/src/cpp/jank/read/lex.cpp @@ -289,6 +289,42 @@ namespace jank::read case '\'': require_space = false; return ok(token{ pos++, token_kind::single_quote }); + case '\\': + { + require_space = false; + + auto const ch(peek()); + pos++; + if(ch.is_none() || std::isspace(ch.unwrap())) + { + return err(error{ token_start, "Expecting a valid character literal after \\" }); + } + + while(true) + { + auto const pt(peek()); + if(pt.is_none() || !is_symbol_char(pt.unwrap())) + { + break; + } + pos++; + } + + native_persistent_string_view const data{ file.data() + token_start, + ++pos - token_start }; + + if(data.size() == 2 || data == "\\newline" || data == "\\backspace" || data == "\\space" + || data == "\\formfeed" || data == "\\return" || data == "\\tab") + { + return ok(token{ token_start, pos - token_start, token_kind::character, data }); + } + + return err(error{ token_start, + pos - token_start, + fmt::format("Invalid character literal `{}` \nNote: Jank " + "doesn't support unicode characters yet!", + data) }); + } case ';': { size_t leading_semis{ 1 }; @@ -456,9 +492,16 @@ namespace jank::read return err(std::move(e.unwrap())); } - /* Support auto-resolved qualified keywords. */ auto const oc(peek()); - if(oc.is_some() && oc.unwrap() == ':') + if(oc.is_none() || std::isspace(oc.unwrap())) + { + ++pos; + return err( + error{ token_start, "invalid keyword: expected non-whitespace character after :" }); + } + + /* Support auto-resolved qualified keywords. */ + if(oc.unwrap() == ':') { ++pos; } diff --git a/compiler+runtime/src/cpp/jank/read/parse.cpp b/compiler+runtime/src/cpp/jank/read/parse.cpp index be811d97f..51ff0da67 100644 --- a/compiler+runtime/src/cpp/jank/read/parse.cpp +++ b/compiler+runtime/src/cpp/jank/read/parse.cpp @@ -1,6 +1,3 @@ -#include -#include - #include #include @@ -124,6 +121,8 @@ namespace jank::read::parse return ok(none); case lex::token_kind::single_quote: return parse_quote(); + case lex::token_kind::character: + return parse_character(); case lex::token_kind::meta_hint: return parse_meta_hint(); case lex::token_kind::reader_macro: @@ -339,6 +338,15 @@ namespace jank::read::parse latest_token }; } + processor::object_result processor::parse_character() + { + auto const token((*token_current).expect_ok()); + ++token_current; + auto const sv(boost::get(token.data)); + + return object_source_info{ make_box(sv), token, token }; + } + processor::object_result processor::parse_meta_hint() { auto const start_token(token_current.latest.unwrap().expect_ok()); diff --git a/compiler+runtime/src/cpp/jank/runtime/obj/character.cpp b/compiler+runtime/src/cpp/jank/runtime/obj/character.cpp new file mode 100644 index 000000000..ce0edd223 --- /dev/null +++ b/compiler+runtime/src/cpp/jank/runtime/obj/character.cpp @@ -0,0 +1,95 @@ +#include + +namespace jank::runtime +{ + static option get_char_from_repr(native_persistent_string const &sv) + { + if(sv.size() == 2) + { + return sv[1]; + } + else if(sv == "\\newline") + { + return '\n'; + } + else if(sv == "\\space") + { + return ' '; + } + else if(sv == "\\tab") + { + return '\t'; + } + else if(sv == "\\backspace") + { + return '\b'; + } + else if(sv == "\\formfeed") + { + return '\f'; + } + else if(sv == "\\return") + { + return '\r'; + } + + return none; + } + + static native_persistent_string get_repr_from_char(char const ch) + { + switch(ch) + { + case '\n': + return "\\newline"; + case ' ': + return "\\space"; + case '\t': + return "\\tab"; + case '\b': + return "\\backspace"; + case '\f': + return "\\formfeed"; + case '\r': + return "\\return"; + default: + return fmt::format("\\{}", ch); + } + } + + obj::character::static_object(native_persistent_string const &d) + : data{ d } + { + } + + obj::character::static_object(char const ch) + : data{ get_repr_from_char(ch) } + { + } + + native_bool obj::character::equal(object const &o) const + { + if(o.type != object_type::character) + { + return false; + } + + auto const c(expect_object(&o)); + return data == c->data; + } + + void obj::character::to_string(fmt::memory_buffer &buff) const + { + fmt::format_to(std::back_inserter(buff), "{}", data); + } + + native_persistent_string const &obj::character::to_string() const + { + return data; + } + + native_hash obj::character::to_hash() const + { + return static_cast(get_char_from_repr(data).unwrap()); + } +} diff --git a/compiler+runtime/src/cpp/jank/runtime/util.cpp b/compiler+runtime/src/cpp/jank/runtime/util.cpp index 2a9d3de6a..74bcf844a 100644 --- a/compiler+runtime/src/cpp/jank/runtime/util.cpp +++ b/compiler+runtime/src/cpp/jank/runtime/util.cpp @@ -51,59 +51,59 @@ namespace jank::runtime } static native_unordered_map const munge_chars{ - { '-', "_" }, - { ':', "_COLON_" }, - { '+', "_PLUS_" }, - { '>', "_GT_" }, - { '<', "_LT_" }, - { '=', "_EQ_" }, - { '~', "_TILDE_" }, - { '!', "_BANG_" }, - { '@', "_CIRCA_" }, - { '#', "_SHARP_" }, - { '\'', "_SQUOTE_" }, - { '"', "_DQUOTE_" }, - { '%', "_PERC_" }, - { '^', "_CARET_" }, - { '&', "_AMP_" }, - { '*', "_STAR_" }, - { '|', "_BAR_" }, - { '{', "_LBRACE_" }, - { '}', "_RBRACE_" }, - { '[', "_LBRACK_" }, - { ']', "_RBRACK_" }, - { '/', "_SLASH_" }, - { '\\', "_BSLASH_" }, - { '?', "_QMARK_" } + { '-', "_"}, + { ':', "_COLON_"}, + { '+', "_PLUS_"}, + { '>', "_GT_"}, + { '<', "_LT_"}, + { '=', "_EQ_"}, + { '~', "_TILDE_"}, + { '!', "_BANG_"}, + { '@', "_CIRCA_"}, + { '#', "_SHARP_"}, + {'\'', "_SQUOTE_"}, + { '"', "_DQUOTE_"}, + { '%', "_PERC_"}, + { '^', "_CARET_"}, + { '&', "_AMP_"}, + { '*', "_STAR_"}, + { '|', "_BAR_"}, + { '{', "_LBRACE_"}, + { '}', "_RBRACE_"}, + { '[', "_LBRACK_"}, + { ']', "_RBRACK_"}, + { '/', "_SLASH_"}, + {'\\', "_BSLASH_"}, + { '?', "_QMARK_"} }; /* This is sorted from longest to shortest so we can check for the longest first. This * allows some entries to be prefixes of others without ambiguity. */ static native_vector> const demunge_chars{ - { "_LBRACE_", '{' }, - { "_RBRACE_", '}' }, - { "_LBRACK_", '[' }, - { "_RBRACK_", ']' }, - { "_BSLASH_", '\\' }, - { "_SQUOTE_", '\'' }, - { "_DQUOTE_", '"' }, - { "_QMARK_", '?' }, - { "_COLON_", ':' }, - { "_TILDE_", '~' }, - { "_CIRCA_", '@' }, - { "_SHARP_", '#' }, - { "_CARET_", '^' }, - { "_SLASH_", '/' }, - { "_PERC_", '%' }, - { "_PLUS_", '+' }, - { "_BANG_", '!' }, - { "_STAR_", '*' }, - { "_AMP_", '&' }, - { "_BAR_", '|' }, - { "_GT_", '>' }, - { "_LT_", '<' }, - { "_EQ_", '=' }, - { "_", '-' }, + {"_LBRACE_", '{'}, + {"_RBRACE_", '}'}, + {"_LBRACK_", '['}, + {"_RBRACK_", ']'}, + {"_BSLASH_", '\\'}, + {"_SQUOTE_", '\''}, + {"_DQUOTE_", '"'}, + { "_QMARK_", '?'}, + { "_COLON_", ':'}, + { "_TILDE_", '~'}, + { "_CIRCA_", '@'}, + { "_SHARP_", '#'}, + { "_CARET_", '^'}, + { "_SLASH_", '/'}, + { "_PERC_", '%'}, + { "_PLUS_", '+'}, + { "_BANG_", '!'}, + { "_STAR_", '*'}, + { "_AMP_", '&'}, + { "_BAR_", '|'}, + { "_GT_", '>'}, + { "_LT_", '<'}, + { "_EQ_", '='}, + { "_", '-'}, }; /* https://en.cppreference.com/w/cpp/keyword */ diff --git a/compiler+runtime/test/cpp/jank/read/lex.cpp b/compiler+runtime/test/cpp/jank/read/lex.cpp index c86b15a8c..168682641 100644 --- a/compiler+runtime/test/cpp/jank/read/lex.cpp +++ b/compiler+runtime/test/cpp/jank/read/lex.cpp @@ -582,6 +582,92 @@ namespace jank::read::lex } } + TEST_CASE("Character") + { + SUBCASE("Whitespace after \\") + { + processor p{ R"(\ )" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ { error(0, "Expecting a valid character literal after \\") } })); + } + + SUBCASE("Dangling \\") + { + processor p{ R"(\)" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ { error(0, "Expecting a valid character literal after \\") } })); + } + + SUBCASE("Alphabetic") + { + processor p{ R"(\a)" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_tokens({ + {0, 2, token_kind::character, "\\a"sv} + })); + } + + SUBCASE("Numeric") + { + processor p{ R"(\1)" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_tokens({ + {0, 2, token_kind::character, "\\1"sv} + })); + } + + SUBCASE("Multiple characters after \\") + { + processor p{ R"(\11)" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ { error(0, + 3, + "Invalid character literal `\\11` \nNote: Jank " + "doesn't support unicode characters yet!"sv) } })); + } + + SUBCASE("Invalid symbol after a valid char") + { + processor p{ R"(\1:)" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ + token{ 0, 2, token_kind::character, "\\1"sv }, + error{ 2, "invalid keyword: expected non-whitespace character after :" } + })); + } + + SUBCASE("Valid consecutive characters") + { + processor p{ R"(\1 \newline\' \\)" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_tokens({ + { 0, 2, token_kind::character, "\\1"sv}, + { 3, 8, token_kind::character, "\\newline"sv}, + {11, 2, token_kind::character, "\\'"sv}, + {14, 2, token_kind::character, "\\\\"sv} + })); + } + + SUBCASE("Character followed by a backticked keyword") + { + processor p{ R"(\a`:kw)" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ + token{ 0, 2, token_kind::character, "\\a"sv }, + token{ 2, token_kind::syntax_quote }, + token{ 3, 3, token_kind::keyword, "kw"sv } + })); + } + } + TEST_CASE("Symbol") { SUBCASE("Single-char") @@ -708,6 +794,16 @@ namespace jank::read::lex })); } + SUBCASE("Whitespace after :") + { + processor p{ ": " }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ + error{0, "invalid keyword: expected non-whitespace character after :"} + })); + } + SUBCASE("Single slash") { processor p{ ":/" }; diff --git a/compiler+runtime/test/cpp/jank/read/parse.cpp b/compiler+runtime/test/cpp/jank/read/parse.cpp index ed16419c7..96c0bd2cf 100644 --- a/compiler+runtime/test/cpp/jank/read/parse.cpp +++ b/compiler+runtime/test/cpp/jank/read/parse.cpp @@ -86,6 +86,74 @@ namespace jank::read::parse CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start); } + TEST_CASE("Character") + { + SUBCASE("Single") + { + lex::processor lp{ R"(\a\1\`\:\#)" }; + processor p{ lp.begin(), lp.end() }; + + size_t offset{}; + for(native_persistent_string const &ch : { "\\a", "\\1", "\\`", "\\:", "\\#" }) + { + auto const r(p.next()); + CHECK(runtime::detail::equal(r.expect_ok().unwrap().ptr, + make_box(ch))); + + CHECK(r.expect_ok().unwrap().start + == lex::token{ offset, 2, lex::token_kind::character, ch }); + CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start); + + /* Current character and then a backslash */ + offset += 2; + } + } + + SUBCASE("Special") + { + lex::processor lp{ R"(\newline \backspace \return \formfeed \tab \space)" }; + processor p{ lp.begin(), lp.end() }; + + size_t offset{}; + for(native_persistent_string const &ch : + { "\\newline", "\\backspace", "\\return", "\\formfeed", "\\tab", "\\space" }) + { + auto const r(p.next()); + CHECK(runtime::detail::equal(r.expect_ok().unwrap().ptr, + make_box(ch))); + + auto const len(ch.size()); + CHECK(r.expect_ok().unwrap().start + == lex::token{ offset, len, lex::token_kind::character, ch }); + CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start); + + /* +1 for space */ + offset += len + 1; + } + } + + SUBCASE("Special and single") + { + lex::processor lp{ R"(\newline\a\tab\`\space)" }; + processor p{ lp.begin(), lp.end() }; + + size_t offset{}; + for(native_persistent_string const &ch : { "\\newline", "\\a", "\\tab", "\\`", "\\space" }) + { + auto const r(p.next()); + CHECK(runtime::detail::equal(r.expect_ok().unwrap().ptr, + make_box(ch))); + + auto const len(ch.size()); + CHECK(r.expect_ok().unwrap().start + == lex::token{ offset, len, lex::token_kind::character, ch }); + CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start); + + offset += len; + } + } + } + TEST_CASE("String") { SUBCASE("Unescaped")