Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Typst Corrections #442

Draft
wants to merge 11 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 118 additions & 9 deletions harper-typst/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use typst_translator::TypstTranslator;
use harper_core::{parsers::Parser, Token};
use itertools::Itertools;
use typst_syntax::{
ast::{AstNode, Markup},
Source,
ast::{AstNode, Expr, Markup},
Source, SyntaxNode,
};

/// A parser that wraps Harper's `PlainEnglish` parser allowing one to ingest Typst files.
Expand All @@ -25,14 +25,61 @@ impl Parser for Typst {

// Recurse through AST to create tokens
let parse_helper = TypstTranslator::new(&typst_document);
typst_tree
.exprs()
let mut buf = Vec::new();
let exprs = typst_tree.exprs().collect_vec();
let exprs = convert_parbreaks(&mut buf, &exprs);
exprs
.into_iter()
.filter_map(|ex| parse_helper.parse_expr(ex, OffsetCursor::new(&typst_document)))
.flatten()
.collect_vec()
}
}

/// Converts newlines after certain elements to paragraph breaks
/// This is accomplished here instead of in the translating module because at this point there is
/// still semantic information associated with the elements.
///
/// Newlines are separate expressions in the parse tree (as the Space variant)
fn convert_parbreaks<'a>(buf: &'a mut Vec<SyntaxNode>, exprs: &'a [Expr]) -> Vec<Expr<'a>> {
// Owned collection of nodes forcibly casted to paragraph breaks
*buf = exprs
.iter()
.map(|e| {
let mut node = SyntaxNode::placeholder(typst_syntax::SyntaxKind::Parbreak);
node.synthesize(e.span());
node
})
.collect_vec();

let should_parbreak = |e1, e2, e3| {
matches!(e2, Expr::Space(_))
&& (matches!(e1, Expr::Heading(_) | Expr::List(_))
|| matches!(e3, Expr::Heading(_) | Expr::List(_)))
};

let mut res: Vec<Expr> = Vec::new();
let mut last_element: Option<Expr> = None;
for ((i, expr), (_, next_expr)) in exprs.iter().enumerate().tuple_windows() {
let mut current_expr = expr.clone();
if let Some(last_element) = last_element {
if should_parbreak(last_element, *expr, *next_expr) {
let pbreak = typst_syntax::ast::Parbreak::from_untyped(&buf[i])
.expect("Unable to convert expression to Parbreak");
current_expr = Expr::Parbreak(pbreak);
}
}
res.push(current_expr);
last_element = Some(expr.clone())
}
// Push last element because it will be excluded by tuple_windows() above
if let Some(last) = exprs.iter().last() {
res.push(last.clone());
}

res
}

#[cfg(test)]
mod tests {
use itertools::Itertools;
Expand Down Expand Up @@ -189,7 +236,7 @@ mod tests {
&token_kinds.as_slice(),
&[
TokenKind::Word(_),
TokenKind::Newline(1),
TokenKind::ParagraphBreak,
TokenKind::Word(_)
]
))
Expand All @@ -216,10 +263,10 @@ mod tests {
}

#[test]
fn label_unlintable() {
fn label_ref_unlintable() {
let source = "= Header
<label>
Paragraph";
Paragraph @label";

let document = Document::new_curated(source, &Typst);
let token_kinds = document.tokens().map(|t| t.kind).collect_vec();
Expand All @@ -229,10 +276,12 @@ mod tests {
&token_kinds.as_slice(),
&[
TokenKind::Word(_),
TokenKind::Newline(1),
TokenKind::ParagraphBreak,
TokenKind::Unlintable,
TokenKind::Newline(1),
TokenKind::Newline(_),
TokenKind::Word(_),
TokenKind::Space(_),
TokenKind::Unlintable,
]
))
}
Expand Down Expand Up @@ -297,4 +346,64 @@ mod tests {
]
));
}

#[test]
fn newline_in_paragraph() {
let source = "Paragraph with
newlines
not paragraph breaks";

let document = Document::new_curated(source, &Typst);
let token_kinds = document.tokens().map(|t| t.kind).collect_vec();
dbg!(&token_kinds);

assert!(matches!(
&token_kinds.as_slice(),
&[
TokenKind::Word(_), // Paragraph
TokenKind::Space(_),
TokenKind::Word(_), // with
TokenKind::Newline(1),
TokenKind::Word(_), // newlines
TokenKind::Newline(1),
TokenKind::Word(_), // not
TokenKind::Space(_),
TokenKind::Word(_), // paragraph
TokenKind::Space(_),
TokenKind::Word(_), // breaks
]
))
}

#[test]
fn parbreaks_in_list() {
let source = "This is a list:
- p1
- p2
- p3";

let document = Document::new_curated(source, &Typst);
let token_kinds = document.tokens().map(|t| t.kind).collect_vec();
dbg!(&token_kinds);

assert!(matches!(
&token_kinds.as_slice(),
&[
TokenKind::Word(_), // This
TokenKind::Space(_),
TokenKind::Word(_), // is
TokenKind::Space(_),
TokenKind::Word(_), // a
TokenKind::Space(_),
TokenKind::Word(_), // list
TokenKind::Punctuation(Punctuation::Colon),
TokenKind::ParagraphBreak,
TokenKind::Word(_),
TokenKind::ParagraphBreak,
TokenKind::Word(_),
TokenKind::ParagraphBreak,
TokenKind::Word(_)
]
))
}
}
14 changes: 10 additions & 4 deletions harper-typst/src/typst_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,16 @@ impl<'a> TypstTranslator<'a> {

// Recurse on each element of an iterator
let iter_recurse = |exprs: &mut dyn Iterator<Item = Expr>| {
Some(exprs.filter_map(|e| recurse!(e)).flatten().collect_vec())
let mut buf = Vec::new();
let exprs = exprs.collect_vec();
let exprs = super::convert_parbreaks(&mut buf, &exprs);
Some(
exprs
.into_iter()
.filter_map(|e| recurse!(e))
.flatten()
.collect_vec(),
)
};

// Parse the parameters of a function or closure
Expand Down Expand Up @@ -225,9 +234,6 @@ impl<'a> TypstTranslator<'a> {
Expr::Strong(strong) => iter_recurse(&mut strong.body().exprs()),
Expr::Emph(emph) => iter_recurse(&mut emph.body().exprs()),
Expr::Link(a) => token!(a, TokenKind::Url),
Expr::Ref(a) => {
token!(a, TokenKind::Word(WordMetadata::default()))
}
Expr::Heading(heading) => iter_recurse(&mut heading.body().exprs()),
Expr::List(list_item) => iter_recurse(&mut list_item.body().exprs()),
Expr::Enum(enum_item) => iter_recurse(&mut enum_item.body().exprs()),
Expand Down
1 change: 1 addition & 0 deletions harper-typst/tests/run_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ macro_rules! create_test {
create_test!(complex_document.typ, 0);
create_test!(simplified_document.typ, 0);
create_test!(complex_document_with_spelling_mistakes.typ, 4);
create_test!(issue_399.typ, 3);
15 changes: 15 additions & 0 deletions harper-typst/tests/test_sources/issue_399.typ
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#problem[
4. Find all the $x$ values where the following function is discontinuous.
]

#solution[
$x=-2,0,3$
]

#aside[
at $x=-2$ jump discontinuity.

at $x=0$ infinite discontinuity.

at $x=3$ removable discontinuity. (can be removed via re-defining the domain to exclude that)
]
Loading