Skip to content

Commit

Permalink
tests(#230): add test using unicode apostrophe
Browse files Browse the repository at this point in the history
  • Loading branch information
grantlemons committed Dec 27, 2024
1 parent 935c85d commit 5336778
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 32 deletions.
12 changes: 5 additions & 7 deletions harper-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,16 +143,14 @@ fn load_file(file: &Path) -> anyhow::Result<(Document, String)> {
let source = std::fs::read_to_string(file)?;

let mut parser: Box<dyn harper_core::parsers::Parser> =
if let Some("md") = file.extension().map(|v| v.to_str().unwrap()) {
Box::new(Markdown)
} else if let Some("typ") = file.extension().map(|v| v.to_str().unwrap()) {
Box::new(Typst)
} else {
Box::new(
match file.extension().map(|v| v.to_str().unwrap()) {
Some("md") => Box::new(Markdown),
Some("typ") => Box::new(Typst),
_ => Box::new(
CommentParser::new_from_filename(file)
.map(Box::new)
.ok_or(format_err!("Could not detect language ID."))?,
)
),
};

Ok((Document::new_curated(&source, &mut parser), source))
Expand Down
63 changes: 38 additions & 25 deletions harper-core/src/parsers/typst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -489,11 +489,11 @@ mod tests {
assert!(matches!(
token_kinds.as_slice(),
&[
TokenKind::Word(_), // identifier
TokenKind::Word(_), // key 1
TokenKind::Word(_), // value 1
TokenKind::Word(_), // key 2
TokenKind::Number(OrderedFloat(2019.0), None), // value 2
TokenKind::Word(_), // Identifier
TokenKind::Word(_), // Key 1
TokenKind::Word(_), // Value 1
TokenKind::Word(_), // Key 2
TokenKind::Number(OrderedFloat(2019.0), None), // Value 2
]
))
}
Expand All @@ -507,21 +507,21 @@ mod tests {
assert!(matches!(
&token_kinds.as_slice(),
&[
TokenKind::Word(_), // identifier
TokenKind::Word(_), // Identifier
TokenKind::Word(_), // This
TokenKind::Space(1),
TokenKind::Word(_), // is
TokenKind::Word(_), // Is
TokenKind::Space(1),
TokenKind::Word(_), // a
TokenKind::Word(_), // A
TokenKind::Space(1),
TokenKind::Word(_), // string
TokenKind::Word(_), // String
]
))
}

#[test]
fn sentence() {
let source = "This is a sentence, it does not have any particularly interesting elements of the typst syntax.";
let source = "This is a sentence, it is not interesting.";

let tokens = Typst.parse_str(source);
let token_kinds = tokens.iter().map(|t| t.kind).collect_vec();
Expand All @@ -546,24 +546,37 @@ mod tests {
TokenKind::Word(_),
TokenKind::Space(1),
TokenKind::Word(_),
TokenKind::Punctuation(Punctuation::Period),
]
))
}

#[test]
fn smart_apostrophe_newline() {
let source = r#"group’s
writing"#;

let tokens = Typst.parse_str(source);
let token_kinds = tokens.iter().map(|t| t.kind).collect_vec();
dbg!(&token_kinds);

let charslice = source.chars().collect_vec();
assert_eq!(tokens[2].span.get_content_string(&charslice), "writing");

assert!(matches!(
token_kinds.as_slice(),
&[
TokenKind::Word(WordMetadata {
noun: Some(NounData {
is_possessive: Some(true),
..
}),
..
}),
TokenKind::Space(1),
TokenKind::Word(_),
TokenKind::Space(1),
TokenKind::Word(_),
TokenKind::Space(1),
TokenKind::Word(_),
TokenKind::Space(1),
TokenKind::Word(_),
TokenKind::Space(1),
TokenKind::Word(_),
TokenKind::Space(1),
TokenKind::Word(_),
TokenKind::Space(1),
TokenKind::Word(_),
TokenKind::Space(1),
TokenKind::Word(_),
TokenKind::Punctuation(Punctuation::Period),
]
))
));
}
}

0 comments on commit 5336778

Please sign in to comment.