Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve comments heuristic #389

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions charon/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions charon/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ which = "6.0.1"

hax-frontend-exporter = { git = "https://github.com/hacspec/hax", branch = "main", optional = true }
macros = { path = "./macros" }
by_address = "1.2.1"

[features]
default = ["rustc"]
Expand Down
11 changes: 5 additions & 6 deletions charon/src/ast/gast.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
//! Definitions common to [crate::ullbc_ast] and [crate::llbc_ast]
pub use super::gast_utils::*;
use crate::expressions::*;
use crate::ast::*;
use crate::generate_index_type;
use crate::ids::Vector;
use crate::llbc_ast;
use crate::meta::{ItemMeta, Span};
use crate::names::Name;
use crate::types::*;
use crate::ullbc_ast;
use crate::values::*;
use derive_visitor::{Drive, DriveMut, Event, Visitor, VisitorMut};
use macros::EnumIsA;
use macros::EnumToGetters;
Expand Down Expand Up @@ -50,10 +48,11 @@ pub struct GExprBody<T> {
/// - the input arguments
/// - the remaining locals, used for the intermediate computations
pub locals: Vector<VarId, Var>,
/// For each line inside the body, we record any whole-line `//` comments found before it. They
/// are added to statements in the late `recover_body_comments` pass.
/// For each line inside the body, we record any whole-line `//` comments found before it. The
/// `Loc` is the location of the start of the (non-comment) line. They are added to statements
/// in the late `recover_body_comments` pass.
#[charon::opaque]
pub comments: Vec<(usize, Vec<String>)>,
pub comments: Vec<(Loc, Vec<String>)>,
pub body: T,
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1226,7 +1226,7 @@ impl<'tcx, 'ctx, 'ctx1> BodyTransCtx<'tcx, 'ctx, 'ctx1> {
}

/// Gather all the lines that start with `//` inside the given span.
fn translate_body_comments(&mut self, charon_span: Span) -> Vec<(usize, Vec<String>)> {
fn translate_body_comments(&mut self, charon_span: Span) -> Vec<(Loc, Vec<String>)> {
let rust_span = charon_span.rust_span();
let source_map = self.t_ctx.tcx.sess.source_map();
if rust_span.ctxt().is_root()
Expand All @@ -1240,12 +1240,20 @@ impl<'tcx, 'ctx, 'ctx1> BodyTransCtx<'tcx, 'ctx, 'ctx1> {
// Compute the absolute line number
.map(|(i, line)| (charon_span.span.end.line - i, line))
// Extract the comment if this line starts with `//`
.map(|(line_nbr, line)| (line_nbr, line.trim_start().strip_prefix("//")))
.map(|(line_nbr, line)| {
let trimmed = line.trim_start();
// The location of the start of the line
let loc = Loc {
line: line_nbr,
col: line.len() - trimmed.len(),
};
(loc, trimmed.strip_prefix("//"))
})
.peekable()
.batching(|iter| {
// Get the next line. This is not a comment: it's either the last line of the
// body or a line that wasn't consumed by `peeking_take_while`.
let (line_nbr, _first) = iter.next()?;
let (loc, _first) = iter.next()?;
// Collect all the comments before this line.
let mut comments = iter
// `peeking_take_while` ensures we don't consume a line that returns
Expand All @@ -1256,7 +1264,7 @@ impl<'tcx, 'ctx, 'ctx1> BodyTransCtx<'tcx, 'ctx, 'ctx1> {
.map(str::to_owned)
.collect_vec();
comments.reverse();
Some((line_nbr, comments))
Some((loc, comments))
})
.filter(|(_, comments)| !comments.is_empty())
.collect_vec();
Expand Down
86 changes: 74 additions & 12 deletions charon/src/transform/recover_body_comments.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
//! Take all the comments found in the original body and assign them to statements.

use derive_visitor::{visitor_enter_fn_mut, DriveMut};
use std::cmp::max;
use std::collections::{HashMap, HashSet};

use derive_visitor::{visitor_enter_fn, visitor_enter_fn_mut, Drive, DriveMut};

use crate::llbc_ast::*;
use crate::transform::TransformCtx;
Expand All @@ -9,21 +12,80 @@ use super::ctx::LlbcPass;

pub struct Transform;
impl LlbcPass for Transform {
// Constraints in the ideal case:
// - each comment should be assigned to at most one statement;
// - the order of comments in the source should refine the partial order of control flow;
// - a comment should come before the statement it was applied to.
// We approximate this with a reasonable heuristic.
//
// We may drop some comments if no statement starts with the relevant line (this can happen if
// e.g. the statement was optimized out or the comment applied to an item instead).
fn transform_body(&self, _ctx: &mut TransformCtx<'_>, b: &mut ExprBody) {
// Constraints in the ideal case:
// - each comment should be assigned to exactly one statement;
// - the order of comments in the source should refine the partial order of control flow;
// - a comment should come before the statement it was applied to.
let mut statements_for_line: HashMap<usize, HashSet<ByAddress<&Statement>>> =
Default::default();
b.body.drive(&mut visitor_enter_fn(|st: &Statement| {
let span = st.span;
let end_line = statements_for_line
.entry(span.span.beg.line)
.or_insert(span.span.beg.line);
*end_line = max(*end_line, span.span.end.line);
}));

// For each source line (that a comment may apply to), we try to compute the set of lines
// that are spanned by the statement/expression that starts on that line. This assumes
// standard one-statement-per-line rust formatting.
// We store for each start line the end line.
let mut lines_covered_by_statement: HashMap<usize, usize> = Default::default();
b.body.drive(&mut visitor_enter_fn(|st: &Statement| {
let span = st.span;
let end_line = lines_covered_by_statement
.entry(span.span.beg.line)
.or_insert(span.span.beg.line);
*end_line = max(*end_line, span.span.end.line);
}));

// TODO: order by statement kind: call, assign
// Find for each line the statement span that starts the earliest as this is more likely to
// correspond to what the comment was intended to point to.
let mut best_span_for_line: HashMap<usize, Span> = Default::default();
b.body.drive(&mut visitor_enter_fn(|st: &Statement| {
if matches!(st.content, RawStatement::FakeRead(_)) {
// These are added after many `let` statements and mess up the comments.
return;
}
let span = st.span;
best_span_for_line
.entry(span.span.beg.line)
.and_modify(|best_span| {
// Find the span that starts the earliest, and among these the largest span.
if span.span.beg.col < best_span.span.beg.col
|| (span.span.beg.col == best_span.span.beg.col
&& span.span.end > best_span.span.end)
{
*best_span = span
}
})
.or_insert(span);
}));

// This is a pretty simple heuristic which is good enough for now.
let mut comments: Vec<(usize, Vec<String>)> = b.comments.clone();
// The map of lines to comments that apply to it.
let mut comments_per_line: HashMap<usize, Vec<String>> = b
.comments
.iter()
.cloned()
.map(|(loc, comments)| (loc.line, comments))
.collect();
// Assign each comment to the first statement that has the best span for its starting line.
b.body
.drive_mut(&mut visitor_enter_fn_mut(|st: &mut Statement| {
let st_line = st.span.span.beg.line;
st.comments_before = comments
.extract_if(|(i, _)| *i <= st_line)
.flat_map(|(_, comments)| comments)
.collect();
if best_span_for_line
.get(&st.span.span.beg.line)
.is_some_and(|best_span| *best_span == st.span)
{
st.comments_before = comments_per_line
.remove(&st.span.span.beg.line)
.unwrap_or_default()
}
}));
}
}
2 changes: 1 addition & 1 deletion charon/tests/cargo/workspace.out
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ fn crate2::extra_random_number() -> u32
let @0: u32; // return
let @1: u32; // anonymous local

// Even more random.
@1 := crate1::random_number()
// Even more random.
@0 := move (@1) + const (1 : u32)
drop @1
return
Expand Down
14 changes: 7 additions & 7 deletions charon/tests/ui/arrays.out
Original file line number Diff line number Diff line change
Expand Up @@ -757,37 +757,37 @@ fn test_crate::take_all()

x@1 := [const (0 : u32), const (0 : u32); 2 : usize]
@fake_read(x@1)
// x is deep copied (copy node appears in Charon, followed by a move)
@3 := copy (x@1)
// x is deep copied (copy node appears in Charon, followed by a move)
@2 := test_crate::take_array(move (@3))
drop @3
drop @2
@5 := copy (x@1)
@4 := test_crate::take_array(move (@5))
drop @5
drop @4
// x passed by address, there is a reborrow here
@8 := &x@1
@7 := &*(@8)
// x passed by address, there is a reborrow here
@6 := test_crate::take_array_borrow(move (@7))
drop @7
drop @8
drop @6
// automatic cast from array to slice (spanning entire array)
@12 := &x@1
@11 := &*(@12)
@10 := @ArrayToSliceShared<'_, u32, 2 : usize>(move (@11))
drop @11
// automatic cast from array to slice (spanning entire array)
@9 := test_crate::take_slice(move (@10))
drop @10
drop @12
drop @9
// note that both appear as SliceNew expressions, meaning the SliceNew UnOp is overloaded for
// mut and non-mut cases
@16 := &mut x@1
@15 := &mut *(@16)
@14 := @ArrayToSliceMut<'_, u32, 2 : usize>(move (@15))
drop @15
// note that both appear as SliceNew expressions, meaning the SliceNew UnOp is overloaded for
// mut and non-mut cases
@13 := test_crate::take_mut_slice(move (@14))
drop @14
drop @16
Expand Down Expand Up @@ -1074,14 +1074,14 @@ fn test_crate::range_all()

x@1 := [const (0 : u32), const (0 : u32), const (0 : u32), const (0 : u32); 4 : usize]
@fake_read(x@1)
// CONFIRM: there is no way to shrink [T;N] into [T;M] with M<N?
@6 := &mut x@1
@7 := core::ops::range::Range { start: const (1 : usize), end: const (3 : usize) }
@5 := core::array::{impl core::ops::index::IndexMut<I> for Array<T, const N : usize>}#16<u32, core::ops::range::Range<usize, core::marker::Sized<usize>>, 4 : usize>[core::marker::Sized<u32>, core::marker::Sized<core::ops::range::Range<usize, core::marker::Sized<usize>>>, core::slice::index::{impl core::ops::index::IndexMut<I> for Slice<T>}#1<u32, core::ops::range::Range<usize, core::marker::Sized<usize>>>[core::marker::Sized<u32>, core::marker::Sized<core::ops::range::Range<usize, core::marker::Sized<usize>>>, core::slice::index::{impl core::slice::index::SliceIndex<Slice<T>> for core::ops::range::Range<usize, core::marker::Sized<usize>>}#4<u32>[core::marker::Sized<u32>]]]::index_mut(move (@6), move (@7))
drop @7
drop @6
@4 := &mut *(@5)
@3 := &two-phase-mut *(@4)
// CONFIRM: there is no way to shrink [T;N] into [T;M] with M<N?
@2 := test_crate::update_mut_slice(move (@3))
drop @3
drop @5
Expand Down Expand Up @@ -1162,9 +1162,9 @@ fn test_crate::non_copyable_array()
drop @3
drop @2
@fake_read(x@1)
@5 := move (x@1)
// x is moved (not deep copied!)
// TODO: determine whether the translation needs to be aware of that and pass by ref instead of by copy
@5 := move (x@1)
@4 := test_crate::take_array_t(move (@5))
drop @5
drop @4
Expand Down
22 changes: 10 additions & 12 deletions charon/tests/ui/comments.out
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ fn test_crate::sum<'_0>(@1: &'_0 (Slice<u32>)) -> u32
// unindented sub-comment
i@3 := const (0 : usize)
@fake_read(i@3)
// Function call
@6 := copy (sum@2)
@5 := move (@6) + const (2 : u32)
drop @6
// Function call
@4 := test_crate::function_call(move (@5))
drop @5
drop @4
Expand All @@ -74,11 +74,11 @@ fn test_crate::sum<'_0>(@1: &'_0 (Slice<u32>)) -> u32
if move (@9) {
drop @11
drop @10
// Add to running sum
@14 := copy (i@3)
@25 := &*(s@1)
@26 := @SliceIndexShared<'_, u32>(move (@25), copy (@14))
@13 := copy (*(@26))
// Add to running sum
sum@2 := copy (sum@2) + move (@13)
drop @13
drop @14
Expand All @@ -101,13 +101,12 @@ fn test_crate::sum<'_0>(@1: &'_0 (Slice<u32>)) -> u32
drop @15
drop @9
drop @7
// Assign the result of an `if`.
@18 := copy (sum@2)
@17 := move (@18) > const (10 : u32)
if move (@17) {
drop @18
// sum + 100
@19 := copy (sum@2)
// sum + 100
@16 := move (@19) + const (100 : u32)
drop @19
}
Expand All @@ -117,12 +116,13 @@ fn test_crate::sum<'_0>(@1: &'_0 (Slice<u32>)) -> u32
@16 := copy (sum@2)
}
drop @17
// Assign the result of an `if`.
sum@2 := move (@16)
drop @16
// Function call
@22 := copy (sum@2)
@21 := move (@22) + const (2 : u32)
drop @22
// Function call
@20 := test_crate::function_call(move (@21))
drop @21
drop @20
Expand Down Expand Up @@ -275,35 +275,34 @@ fn test_crate::foo()
let @36: &'_ (Array<u32, 10 : usize>); // anonymous local
let @37: &'_ (u32); // anonymous local

// Call `default` and destructure the result
@3 := test_crate::{impl core::default::Default for test_crate::Foo}::default()
@fake_read(@3)
// Call `default` and destructure the result
x@1 := copy ((@3).x)
y@2 := copy ((@3).y)
drop @3
// Call `eat` on an aggregate value
@6 := copy (x@1)
@7 := copy (y@2)
@5 := test_crate::Foo { x: move (@6), y: move (@7) }
drop @7
drop @6
// Call `eat` on an aggregate value
@4 := test_crate::eat<test_crate::Foo>[core::marker::Sized<test_crate::Foo>](move (@5))
drop @5
drop @4
// Call `default` and destructure the result
// This is the long field
@10 := test_crate::{impl core::default::Default for test_crate::Bar}#1::default()
@fake_read(@10)
x@8 := copy ((@10).x)
// This is the long field
super_long_field_name@9 := copy ((@10).super_long_field_name)
drop @10
// Call `eat` on an aggregate value
@13 := copy (x@8)
// This is the long field
@14 := copy (super_long_field_name@9)
@12 := test_crate::Bar { x: move (@13), super_long_field_name: move (@14) }
drop @14
drop @13
// Call `eat` on an aggregate value
@11 := test_crate::eat<test_crate::Bar>[core::marker::Sized<test_crate::Bar>](move (@12))
drop @12
drop @11
Expand Down Expand Up @@ -380,9 +379,9 @@ fn test_crate::thing()
let @5: u32; // anonymous local
let @6: (); // anonymous local

// This comment belongs above the assignment to `x` and not above intermediate computations.
@5 := test_crate::CONSTANT
@2 := move (@5) >> const (3 : i32)
// This comment belongs above the assignment to `x` and not above intermediate computations.
x@1 := move (@2) + const (12 : u32)
drop @2
@fake_read(x@1)
Expand All @@ -403,7 +402,6 @@ fn test_crate::fake_read(@1: u32)
let x@1: u32; // arg #1
let @2: (); // anonymous local

// This statement is translated to a `fake_read`.
@fake_read(x@1)
@2 := ()
@0 := move (@2)
Expand Down
Loading
Loading