Skip to content

Commit

Permalink
Refactor resolving a location into a SemIR library (#4876)
Browse files Browse the repository at this point in the history
At present, lower depends on `Check::SemIRDiagnosticConverter` for debug
info. That was to support a quick implementation of debug info, but
isn't great because it's both an unusual dependency on check's
implementation, and relying on diagnostic structures for debug info.

This cleans that up by splitting relevant logic out to a library in
sem_ir, and having lowering use sem_ir's library instead of check's.
Additionally, a small refactoring of `Parse::TreeAndSubtrees` to allow
getting locations in lowering without going through a `DiagnosticLoc`.
I'm adding `Parse::GetTreeAndSubtreesFn` in because it's a complex
signature to have in so many spots.

I chose to have `ResolveNodeId` return a `SmallVector` because it seemed
likely to be fairly compact, but that could also be using an optional
callback to handle resolved node IDs, possibly just returning the last
entry. This could be switched if preferred.

Note this change shouldn't affect behavior, it's just moving code
around.

---------

Co-authored-by: Chandler Carruth <chandlerc@gmail.com>
  • Loading branch information
jonmeow and chandlerc authored Feb 6, 2025
1 parent 34ceb6b commit 7eee9a3
Show file tree
Hide file tree
Showing 19 changed files with 330 additions and 200 deletions.
2 changes: 2 additions & 0 deletions toolchain/check/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,10 @@ cc_library(
"//toolchain/diagnostics:diagnostic_emitter",
"//toolchain/lex:token_index",
"//toolchain/parse:tree",
"//toolchain/sem_ir:absolute_node_id",
"//toolchain/sem_ir:file",
"//toolchain/sem_ir:stringify_type",
"//toolchain/sem_ir:typed_insts",
"@llvm-project//llvm:Support",
],
)
3 changes: 1 addition & 2 deletions toolchain/check/check.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ struct Unit {
Timings* timings;

// Returns a lazily constructed TreeAndSubtrees.
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees;
Parse::GetTreeAndSubtreesFn get_parse_tree_and_subtrees;

// The unit's SemIR, provided as empty and filled in by CheckParseTrees.
SemIR::File* sem_ir;
Expand Down
6 changes: 2 additions & 4 deletions toolchain/check/check_unit.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ class UnitAndImportsDiagnosticConverter
: public DiagnosticConverter<Parse::NodeId> {
public:
explicit UnitAndImportsDiagnosticConverter(
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees)
Parse::GetTreeAndSubtreesFn get_parse_tree_and_subtrees)
: get_parse_tree_and_subtrees_(get_parse_tree_and_subtrees) {}

auto ConvertLoc(Parse::NodeId node_id, ContextFnT /*context_fn*/) const
Expand All @@ -59,8 +58,7 @@ class UnitAndImportsDiagnosticConverter
}

private:
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees_;
Parse::GetTreeAndSubtreesFn get_parse_tree_and_subtrees_;
};

// Contains information accumulated while checking a `Unit` (primarily import
Expand Down
3 changes: 1 addition & 2 deletions toolchain/check/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@
namespace Carbon::Check {

Context::Context(DiagnosticEmitter* emitter,
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees,
Parse::GetTreeAndSubtreesFn get_parse_tree_and_subtrees,
SemIR::File* sem_ir, int imported_ir_count, int total_ir_count,
llvm::raw_ostream* vlog_stream)
: emitter_(emitter),
Expand Down
6 changes: 2 additions & 4 deletions toolchain/check/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ class Context {

// Stores references for work.
explicit Context(DiagnosticEmitter* emitter,
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees,
Parse::GetTreeAndSubtreesFn get_parse_tree_and_subtrees,
SemIR::File* sem_ir, int imported_ir_count,
int total_ir_count, llvm::raw_ostream* vlog_stream);

Expand Down Expand Up @@ -748,8 +747,7 @@ class Context {
DiagnosticEmitter* emitter_;

// Returns a lazily constructed TreeAndSubtrees.
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees_;
Parse::GetTreeAndSubtreesFn get_parse_tree_and_subtrees_;

// The SemIR::File being added to.
SemIR::File* sem_ir_;
Expand Down
134 changes: 25 additions & 109 deletions toolchain/check/sem_ir_diagnostic_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "toolchain/check/sem_ir_diagnostic_converter.h"

#include "common/raw_string_ostream.h"
#include "toolchain/sem_ir/absolute_node_id.h"
#include "toolchain/sem_ir/stringify_type.h"

namespace Carbon::Check {
Expand Down Expand Up @@ -35,109 +36,34 @@ auto SemIRDiagnosticConverter::ConvertLoc(SemIRLoc loc,
auto SemIRDiagnosticConverter::ConvertLocImpl(SemIRLoc loc,
ContextFnT context_fn) const
-> ConvertedDiagnosticLoc {
// Cursors for the current IR and instruction in that IR.
const auto* cursor_ir = sem_ir_;
auto cursor_inst_id = SemIR::InstId::None;

// Notes an import on the diagnostic and updates cursors to point at the
// imported IR.
auto follow_import_ref = [&](SemIR::ImportIRInstId import_ir_inst_id) {
auto import_ir_inst = cursor_ir->import_ir_insts().Get(import_ir_inst_id);
const auto& import_ir = cursor_ir->import_irs().Get(import_ir_inst.ir_id);
CARBON_CHECK(import_ir.decl_id.has_value(),
"If we get `None` locations here, we may need to more "
"thoroughly track ImportDecls.");

ConvertedDiagnosticLoc in_import_loc;
auto import_loc_id = cursor_ir->insts().GetLocId(import_ir.decl_id);
if (import_loc_id.is_node_id()) {
// For imports in the current file, the location is simple.
in_import_loc = ConvertLocInFile(cursor_ir, import_loc_id.node_id(),
loc.token_only_, context_fn);
} else if (import_loc_id.is_import_ir_inst_id()) {
// For implicit imports, we need to unravel the location a little
// further.
auto implicit_import_ir_inst =
cursor_ir->import_ir_insts().Get(import_loc_id.import_ir_inst_id());
const auto& implicit_ir =
cursor_ir->import_irs().Get(implicit_import_ir_inst.ir_id);
auto implicit_loc_id =
implicit_ir.sem_ir->insts().GetLocId(implicit_import_ir_inst.inst_id);
CARBON_CHECK(implicit_loc_id.is_node_id(),
"Should only be one layer of implicit imports");
in_import_loc =
ConvertLocInFile(implicit_ir.sem_ir, implicit_loc_id.node_id(),
loc.token_only_, context_fn);
}

// TODO: Add an "In implicit import of prelude." note for the case where we
// don't have a location.
if (import_loc_id.has_value()) {
// TODO: Include the name of the imported library in the diagnostic.
CARBON_DIAGNOSTIC(InImport, LocationInfo, "in import");
context_fn(in_import_loc.loc, InImport);
}

cursor_ir = import_ir.sem_ir;
cursor_inst_id = import_ir_inst.inst_id;
};

// If the location is is an import, follows it and returns nullopt.
// Otherwise, it's a parse node, so return the final location.
auto handle_loc =
[&](SemIR::LocId loc_id) -> std::optional<ConvertedDiagnosticLoc> {
if (loc_id.is_import_ir_inst_id()) {
follow_import_ref(loc_id.import_ir_inst_id());
return std::nullopt;
} else {
// Parse nodes always refer to the current IR.
return ConvertLocInFile(cursor_ir, loc_id.node_id(), loc.token_only_,
context_fn);
}
};

// Handle the base location.
if (loc.is_inst_id_) {
cursor_inst_id = loc.inst_id_;
} else {
if (auto diag_loc = handle_loc(loc.loc_id_)) {
return *diag_loc;
llvm::SmallVector<SemIR::AbsoluteNodeId> absolute_node_ids =
loc.is_inst_id_ ? SemIR::GetAbsoluteNodeId(sem_ir_, loc.inst_id_)
: SemIR::GetAbsoluteNodeId(sem_ir_, loc.loc_id_);

auto final_node_id = absolute_node_ids.pop_back_val();
for (const auto& absolute_node_id : absolute_node_ids) {
if (!absolute_node_id.node_id.has_value()) {
// TODO: Add an "In implicit import of prelude." note for the case where
// we don't have a location.
continue;
}
CARBON_CHECK(cursor_inst_id.has_value(), "Should have been set");
// TODO: Include the name of the imported library in the diagnostic.
auto diag_loc =
ConvertLocInFile(absolute_node_id, loc.token_only_, context_fn);
CARBON_DIAGNOSTIC(InImport, LocationInfo, "in import");
context_fn(diag_loc.loc, InImport);
}

while (true) {
if (cursor_inst_id.has_value()) {
auto cursor_inst = cursor_ir->insts().Get(cursor_inst_id);
if (auto bind_ref = cursor_inst.TryAs<SemIR::ExportDecl>();
bind_ref && bind_ref->value_id.has_value()) {
cursor_inst_id = bind_ref->value_id;
continue;
}

// If the parse node has a value, use it for the location.
if (auto loc_id = cursor_ir->insts().GetLocId(cursor_inst_id);
loc_id.has_value()) {
if (auto diag_loc = handle_loc(loc_id)) {
return *diag_loc;
}
continue;
}

// If a namespace has an instruction for an import, switch to looking at
// it.
if (auto ns = cursor_inst.TryAs<SemIR::Namespace>()) {
if (ns->import_id.has_value()) {
cursor_inst_id = ns->import_id;
continue;
}
}
}
return ConvertLocInFile(final_node_id, loc.token_only_, context_fn);
}

// `None` parse node but not an import; just nothing to point at.
return ConvertLocInFile(cursor_ir, Parse::NodeId::None, loc.token_only_,
context_fn);
}
auto SemIRDiagnosticConverter::ConvertLocInFile(
SemIR::AbsoluteNodeId absolute_node_id, bool token_only,
ContextFnT /*context_fn*/) const -> ConvertedDiagnosticLoc {
const auto& tree_and_subtrees =
imported_trees_and_subtrees_[absolute_node_id.check_ir_id.index]();
return tree_and_subtrees.NodeToDiagnosticLoc(absolute_node_id.node_id,
token_only);
}

auto SemIRDiagnosticConverter::ConvertArg(llvm::Any arg) const -> llvm::Any {
Expand Down Expand Up @@ -195,14 +121,4 @@ auto SemIRDiagnosticConverter::ConvertArg(llvm::Any arg) const -> llvm::Any {
return DiagnosticConverter<SemIRLoc>::ConvertArg(arg);
}

auto SemIRDiagnosticConverter::ConvertLocInFile(const SemIR::File* sem_ir,
Parse::NodeId node_id,
bool token_only,
ContextFnT /*context_fn*/) const
-> ConvertedDiagnosticLoc {
const auto& tree_and_subtrees =
imported_trees_and_subtrees_[sem_ir->check_ir_id().index]();
return tree_and_subtrees.NodeToDiagnosticLoc(node_id, token_only);
}

} // namespace Carbon::Check
13 changes: 6 additions & 7 deletions toolchain/check/sem_ir_diagnostic_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@
#include "toolchain/diagnostics/diagnostic_emitter.h"
#include "toolchain/lex/token_index.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/sem_ir/absolute_node_id.h"
#include "toolchain/sem_ir/file.h"
#include "toolchain/sem_ir/ids.h"

namespace Carbon::Check {

// Handles the transformation of a SemIRLoc to a DiagnosticLoc.
class SemIRDiagnosticConverter : public DiagnosticConverter<SemIRLoc> {
public:
using TreeFnT = llvm::function_ref<const Parse::TreeAndSubtrees&()>;

explicit SemIRDiagnosticConverter(
llvm::ArrayRef<TreeFnT> imported_trees_and_subtrees,
llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> imported_trees_and_subtrees,
const SemIR::File* sem_ir)
: imported_trees_and_subtrees_(imported_trees_and_subtrees),
sem_ir_(sem_ir) {}
Expand Down Expand Up @@ -50,12 +50,11 @@ class SemIRDiagnosticConverter : public DiagnosticConverter<SemIRLoc> {

// Converts a node_id corresponding to a specific sem_ir to a diagnostic
// location.
auto ConvertLocInFile(const SemIR::File* sem_ir, Parse::NodeId node_id,
bool token_only, ContextFnT context_fn) const
-> ConvertedDiagnosticLoc;
auto ConvertLocInFile(SemIR::AbsoluteNodeId absolute_node_id, bool token_only,
ContextFnT context_fn) const -> ConvertedDiagnosticLoc;

// Converters for each SemIR.
llvm::ArrayRef<TreeFnT> imported_trees_and_subtrees_;
llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> imported_trees_and_subtrees_;

// The current SemIR being processed.
const SemIR::File* sem_ir_;
Expand Down
36 changes: 21 additions & 15 deletions toolchain/driver/compile_subcommand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,19 +335,20 @@ class CompilationUnit {

// Prepares per-IR lazy fetch functions which may come up in cross-IR
// diagnostics.
auto PreCheck() -> llvm::function_ref<const Parse::TreeAndSubtrees&()>;
auto PreCheck() -> Parse::GetTreeAndSubtreesFn;

// Returns information needed to check this unit.
auto GetCheckUnit(
SemIR::CheckIRId check_ir_id,
llvm::ArrayRef<llvm::function_ref<const Parse::TreeAndSubtrees&()>>
all_trees_and_subtrees) -> Check::Unit;
llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> all_trees_and_subtrees)
-> Check::Unit;

// Runs post-check logic. Returns true if checking succeeded for the IR.
auto PostCheck() -> void;

// Lower SemIR to LLVM IR.
auto RunLower() -> void;
auto RunLower(std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
all_trees_and_subtrees_for_debug_info) -> void;

auto RunCodeGen() -> void;

Expand Down Expand Up @@ -499,8 +500,7 @@ auto CompilationUnit::RunParse() -> void {
}
}

auto CompilationUnit::PreCheck()
-> llvm::function_ref<const Parse::TreeAndSubtrees&()> {
auto CompilationUnit::PreCheck() -> Parse::GetTreeAndSubtreesFn {
CARBON_CHECK(parse_tree_, "Must call RunParse first");
CARBON_CHECK(!get_parse_tree_and_subtrees_, "Called PreCheck twice");

Expand All @@ -512,8 +512,8 @@ auto CompilationUnit::PreCheck()

auto CompilationUnit::GetCheckUnit(
SemIR::CheckIRId check_ir_id,
llvm::ArrayRef<llvm::function_ref<const Parse::TreeAndSubtrees&()>>
all_trees_and_subtrees) -> Check::Unit {
llvm::ArrayRef<Parse::GetTreeAndSubtreesFn> all_trees_and_subtrees)
-> Check::Unit {
CARBON_CHECK(get_parse_tree_and_subtrees_, "Must call PreCheck first");
CARBON_CHECK(!sem_ir_converter_, "Called GetCheckUnit twice");

Expand Down Expand Up @@ -587,15 +587,17 @@ auto CompilationUnit::PostCheck() -> void {
}
}

auto CompilationUnit::RunLower() -> void {
auto CompilationUnit::RunLower(
std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
all_trees_and_subtrees_for_debug_info) -> void {
LogCall("Lower::LowerToLLVM", "lower", [&] {
llvm_context_ = std::make_unique<llvm::LLVMContext>();
// TODO: Consider disabling instruction naming by default if we're not
// producing textual LLVM IR.
SemIR::InstNamer inst_namer(&*sem_ir_);
module_ = Lower::LowerToLLVM(*llvm_context_, options_.include_debug_info,
*sem_ir_converter_, input_filename_, *sem_ir_,
&inst_namer, vlog_stream_);
module_ = Lower::LowerToLLVM(
*llvm_context_, all_trees_and_subtrees_for_debug_info, input_filename_,
*sem_ir_, &inst_namer, vlog_stream_);
});
if (vlog_stream_) {
CARBON_VLOG("*** llvm::Module ***\n");
Expand Down Expand Up @@ -842,8 +844,7 @@ auto CompileSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
}

// Pre-check assigns IR IDs and constructs node converters.
llvm::SmallVector<llvm::function_ref<const Parse::TreeAndSubtrees&()>>
all_trees_and_subtrees;
llvm::SmallVector<Parse::GetTreeAndSubtreesFn> all_trees_and_subtrees;
// This size may not match due to units that are missing source, but that's an
// error case and not worth extra work.
all_trees_and_subtrees.reserve(units.size());
Expand Down Expand Up @@ -887,8 +888,13 @@ auto CompileSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
}

// Lower.
std::optional<llvm::ArrayRef<Parse::GetTreeAndSubtreesFn>>
all_trees_and_subtrees_for_debug_info;
if (options_.include_debug_info) {
all_trees_and_subtrees_for_debug_info = all_trees_and_subtrees;
}
for (const auto& unit : units) {
unit->RunLower();
unit->RunLower(all_trees_and_subtrees_for_debug_info);
}
if (options_.phase == CompileOptions::Phase::Lower) {
return make_result();
Expand Down
2 changes: 2 additions & 0 deletions toolchain/lower/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ cc_library(
deps = [
":context",
"//toolchain/check:sem_ir_diagnostic_converter",
"//toolchain/parse:tree",
"//toolchain/sem_ir:file",
"//toolchain/sem_ir:inst_namer",
"@llvm-project//llvm:Core",
Expand Down Expand Up @@ -50,6 +51,7 @@ cc_library(
"//common:vlog",
"//toolchain/base:kind_switch",
"//toolchain/check:sem_ir_diagnostic_converter",
"//toolchain/sem_ir:absolute_node_id",
"//toolchain/sem_ir:entry_point",
"//toolchain/sem_ir:file",
"//toolchain/sem_ir:inst",
Expand Down
Loading

0 comments on commit 7eee9a3

Please sign in to comment.