Skip to content

Commit

Permalink
Add token info to parse results
Browse files Browse the repository at this point in the history
We're not using meta here, since not every object has meta. We only
include the start/end of each parsed item, though, so a list of numbers
will only have the ( and ) tokens, not tokens for each number. I'm not
sure how we'd handle that if we need it, but I don't need it yet anyway.
  • Loading branch information
jeaye committed Mar 10, 2024
1 parent 04dfd13 commit 9a134d4
Show file tree
Hide file tree
Showing 9 changed files with 356 additions and 95 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ add_library(
src/cpp/jank/util/cli.cpp
src/cpp/jank/util/mapped_file.cpp
src/cpp/jank/util/scope_exit.cpp
src/cpp/jank/util/escape.cpp
src/cpp/jank/profile/time.cpp
src/cpp/jank/read/lex.cpp
src/cpp/jank/read/parse.cpp
Expand Down
2 changes: 2 additions & 0 deletions include/cpp/jank/read/lex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ namespace jank::read::lex

struct token
{
token() = default;
token(token_kind const k);
token(size_t const p, token_kind const k);
token(size_t const p, token_kind const k, native_integer const);
Expand All @@ -51,6 +52,7 @@ namespace jank::read::lex
token(size_t const p, size_t const s, token_kind const k, native_integer const);
token(size_t const p, size_t const s, token_kind const k, native_real const);
token(size_t const p, size_t const s, token_kind const k, native_persistent_string_view const);
token(size_t const p, size_t const s, token_kind const k, char const * const);
token(size_t const p, size_t const s, token_kind const k, native_bool const);

native_bool operator==(token const &rhs) const;
Expand Down
2 changes: 2 additions & 0 deletions include/cpp/jank/read/parse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace jank::read::parse
native_bool operator!=(object_source_info const &rhs) const;

runtime::object_ptr ptr{};
lex::token start, end;
};

using object_result = result<option<object_source_info>, error>;
Expand Down Expand Up @@ -67,6 +68,7 @@ namespace jank::read::parse
runtime::context &rt_ctx;
lex::processor::iterator token_current, token_end;
option<lex::token_kind> expected_closer;
lex::token latest_token;
/* Whether or not the next form is considered quoted. */
native_bool quoted{};
};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
#pragma once

/* This provides a fmt extension for escaping strings and wrapping them in
* quotes. It's largely adapted from here:
* https://github.com/fmtlib/fmt/issues/825#issuecomment-1227501168 */
namespace jank::codegen
namespace jank::util
{
/* This provides a fmt extension for escaping strings and wrapping them in
* quotes. It's largely adapted from here:
* https://github.com/fmtlib/fmt/issues/825#issuecomment-1227501168
*
* Usage just looks like:
* fmt::format("{}", util::escaped_quoted_view(s))
*/
template <typename S = native_persistent_string_view>
struct escape_view
{
Expand Down Expand Up @@ -50,16 +54,22 @@ namespace jank::codegen
};

constexpr escape_view<native_persistent_string_view>
escaped(native_persistent_string_view const &sv, char const q = '"', char const e = '\\')
escaped_quoted_view(native_persistent_string_view const &sv,
char const q = '"',
char const e = '\\')
{
return escape_view<native_persistent_string_view>{ sv, q, e };
}

/* These provide normal escaping/unescaping, with no quoting. */
string_result<native_transient_string> unescape(native_transient_string const &input);
native_transient_string escape(native_transient_string const &input);
}

template <typename S>
struct fmt::formatter<jank::codegen::escape_view<S>>
struct fmt::formatter<jank::util::escape_view<S>>
{
using V = jank::codegen::escape_view<S>;
using V = jank::util::escape_view<S>;

template <typename C>
constexpr auto parse(C &ctx)
Expand All @@ -68,7 +78,7 @@ struct fmt::formatter<jank::codegen::escape_view<S>>
}

template <typename C>
auto format(jank::codegen::escape_view<S> const &s, C &ctx)
auto format(jank::util::escape_view<S> const &s, C &ctx)
{
return s.copy(ctx.out());
}
Expand Down
4 changes: 2 additions & 2 deletions src/cpp/jank/codegen/processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <jank/runtime/obj/number.hpp>
#include <jank/runtime/util.hpp>
#include <jank/codegen/processor.hpp>
#include <jank/codegen/escape.hpp>
#include <jank/util/escape.hpp>
#include <jank/detail/to_runtime_data.hpp>

/* The strategy for codegen to C++ is quite simple. Codegen always happens on a
Expand Down Expand Up @@ -181,7 +181,7 @@ namespace jank::codegen
{
fmt::format_to(inserter,
"jank::make_box<jank::runtime::obj::persistent_string>({})",
escaped(typed_o->data));
util::escaped_quoted_view(typed_o->data));
}
else if constexpr(std::same_as<T, runtime::obj::persistent_vector>)
{
Expand Down
8 changes: 8 additions & 0 deletions src/cpp/jank/read/lex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,14 @@ namespace jank::read
{
}

token::token(size_t const p, size_t const s, token_kind const k, char const * const d)
: pos{ p }
, size{ s }
, kind{ k }
, data{ native_persistent_string_view{ d } }
{
}

token::token(size_t const p, size_t const s, token_kind const k, native_bool const d)
: pos{ p }
, size{ s }
Expand Down
122 changes: 44 additions & 78 deletions src/cpp/jank/read/parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,60 +12,10 @@
#include <jank/runtime/obj/keyword.hpp>
#include <jank/runtime/obj/persistent_string.hpp>
#include <jank/read/parse.hpp>
#include <jank/util/escape.hpp>

namespace jank::read::parse
{
namespace detail
{
string_result<native_transient_string> unescape(native_transient_string const &input)
{
native_transient_string ss;
ss.reserve(input.size());
native_bool escape{};

for(auto const c : input)
{
if(!escape)
{
if(c == '\\')
{
escape = true;
}
else
{
ss += c;
}
}
else
{
switch(c)
{
case 'n':
ss += '\n';
break;
case 't':
ss += '\t';
break;
case 'r':
ss += '\r';
break;
case '\\':
ss += '\\';
break;
case '"':
ss += '"';
break;
default:
return err(fmt::format("invalid escape sequence: \\{}", c));
}
escape = false;
}
}

return ok(ss);
}
}

native_bool
processor::object_source_info::operator==(processor::object_source_info const &rhs) const
{
Expand All @@ -75,7 +25,7 @@ namespace jank::read::parse
native_bool
processor::object_source_info::operator!=(processor::object_source_info const &rhs) const
{
return ptr != rhs.ptr;
return ptr != rhs.ptr || start != rhs.start || end != rhs.end;
}

processor::iterator::value_type processor::iterator::operator*() const
Expand Down Expand Up @@ -133,8 +83,8 @@ namespace jank::read::parse
{
return token_result.err().unwrap();
}
auto token(token_result.expect_ok());
switch(token.kind)
latest_token = token_result.expect_ok();
switch(latest_token.kind)
{
/* We ignore comments, but everything else returns out of the loop. */
case lex::token_kind::comment:
Expand All @@ -150,10 +100,10 @@ namespace jank::read::parse
case lex::token_kind::close_square_bracket:
case lex::token_kind::close_paren:
case lex::token_kind::close_curly_bracket:
if(expected_closer != token.kind)
if(expected_closer != latest_token.kind)
{
return err(
error{ token.pos, native_persistent_string{ "unexpected closing character" } });
return err(error{ latest_token.pos,
native_persistent_string{ "unexpected closing character" } });
}
++token_current;
expected_closer = none;
Expand Down Expand Up @@ -181,16 +131,16 @@ namespace jank::read::parse
default:
{
native_persistent_string msg{ fmt::format("unexpected token kind: {}",
magic_enum::enum_name(token.kind)) };
return err(error{ token.pos, std::move(msg) });
magic_enum::enum_name(latest_token.kind)) };
return err(error{ latest_token.pos, std::move(msg) });
}
}
}
}

processor::object_result processor::parse_list()
{
auto const start_token(token_current.latest.unwrap().expect_ok());
auto const start_token((*token_current).expect_ok());
++token_current;
auto const prev_expected_closer(expected_closer);
expected_closer = some(lex::token_kind::close_paren);
Expand All @@ -210,12 +160,14 @@ namespace jank::read::parse
}

expected_closer = prev_expected_closer;
return object_source_info{ make_box<runtime::obj::persistent_list>(ret.rbegin(), ret.rend()) };
return object_source_info{ make_box<runtime::obj::persistent_list>(ret.rbegin(), ret.rend()),
start_token,
latest_token };
}

processor::object_result processor::parse_vector()
{
auto const start_token(token_current.latest.unwrap().expect_ok());
auto const start_token((*token_current).expect_ok());
++token_current;
auto const prev_expected_closer(expected_closer);
expected_closer = some(lex::token_kind::close_square_bracket);
Expand All @@ -235,13 +187,15 @@ namespace jank::read::parse
}

expected_closer = prev_expected_closer;
return object_source_info{ make_box<runtime::obj::persistent_vector>(ret.persistent()) };
return object_source_info{ make_box<runtime::obj::persistent_vector>(ret.persistent()),
start_token,
latest_token };
}

/* TODO: Uniqueness check. */
processor::object_result processor::parse_map()
{
auto const start_token(token_current.latest.unwrap().expect_ok());
auto const start_token((*token_current).expect_ok());
++token_current;
auto const prev_expected_closer(expected_closer);
expected_closer = some(lex::token_kind::close_curly_bracket);
Expand Down Expand Up @@ -274,12 +228,14 @@ namespace jank::read::parse
}

expected_closer = prev_expected_closer;
return object_source_info{ make_box<runtime::obj::persistent_array_map>(ret) };
return object_source_info{ make_box<runtime::obj::persistent_array_map>(ret),
start_token,
latest_token };
}

processor::object_result processor::parse_quote()
{
auto const start_token(token_current.latest.unwrap().expect_ok());
auto const start_token((*token_current).expect_ok());
++token_current;
auto const old_quoted(quoted);
quoted = true;
Expand All @@ -294,23 +250,25 @@ namespace jank::read::parse
return err(error{ start_token.pos, native_persistent_string{ "invalid value after quote" } });
}

return object_source_info{ runtime::erase(
make_box<runtime::obj::persistent_list>(make_box<runtime::obj::symbol>("quote"),
val_result.expect_ok().unwrap().ptr)) };
return object_source_info{ runtime::erase(make_box<runtime::obj::persistent_list>(
make_box<runtime::obj::symbol>("quote"),
val_result.expect_ok().unwrap().ptr)),
start_token,
latest_token };
}

processor::object_result processor::parse_nil()
{
++token_current;
return object_source_info{ runtime::obj::nil::nil_const() };
return object_source_info{ runtime::obj::nil::nil_const(), latest_token, latest_token };
}

processor::object_result processor::parse_boolean()
{
auto const token((*token_current).expect_ok());
++token_current;
auto const b(boost::get<native_bool>(token.data));
return object_source_info{ make_box<runtime::obj::boolean>(b) };
return object_source_info{ make_box<runtime::obj::boolean>(b), token, token };
}

processor::object_result processor::parse_symbol()
Expand Down Expand Up @@ -352,7 +310,7 @@ namespace jank::read::parse
{
name = sv;
}
return object_source_info{ make_box<runtime::obj::symbol>(ns, name) };
return object_source_info{ make_box<runtime::obj::symbol>(ns, name), token, token };
}

processor::object_result processor::parse_keyword()
Expand Down Expand Up @@ -388,22 +346,26 @@ namespace jank::read::parse
{
return err(intern_res.expect_err());
}
return object_source_info{ intern_res.expect_ok() };
return object_source_info{ intern_res.expect_ok(), token, token };
}

processor::object_result processor::parse_integer()
{
auto const token(token_current->expect_ok());
++token_current;
return object_source_info{ make_box<runtime::obj::integer>(
boost::get<native_integer>(token.data)) };
boost::get<native_integer>(token.data)),
token,
token };
}

processor::object_result processor::parse_real()
{
auto const token(token_current->expect_ok());
++token_current;
return object_source_info{ make_box<runtime::obj::real>(boost::get<native_real>(token.data)) };
return object_source_info{ make_box<runtime::obj::real>(boost::get<native_real>(token.data)),
token,
token };
}

processor::object_result processor::parse_string()
Expand All @@ -412,20 +374,24 @@ namespace jank::read::parse
++token_current;
auto const sv(boost::get<native_persistent_string_view>(token.data));
return object_source_info{ make_box<runtime::obj::persistent_string>(
native_persistent_string{ sv.data(), sv.size() }) };
native_persistent_string{ sv.data(), sv.size() }),
token,
token };
}

processor::object_result processor::parse_escaped_string()
{
auto const token(token_current->expect_ok());
++token_current;
auto const sv(boost::get<native_persistent_string_view>(token.data));
auto res(detail::unescape({ sv.data(), sv.size() }));
auto res(util::unescape({ sv.data(), sv.size() }));
if(res.is_err())
{
return err(error{ token.pos, res.expect_err_move() });
}
return object_source_info{ make_box<runtime::obj::persistent_string>(res.expect_ok_move()) };
return object_source_info{ make_box<runtime::obj::persistent_string>(res.expect_ok_move()),
token,
token };
}

processor::iterator processor::begin()
Expand Down
Loading

0 comments on commit 9a134d4

Please sign in to comment.