Skip to content

Commit

Permalink
Merge pull request #38 from allo-media/master
Browse files Browse the repository at this point in the history
Dutch support.
  • Loading branch information
rtxm authored Jul 11, 2024
2 parents c41fddf + f4c22c6 commit a7b8889
Show file tree
Hide file tree
Showing 9 changed files with 561 additions and 6 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
[package]
name = "text2num"
version = "2.3.0"
version = "2.4.0"
authors = ["Allo-Media <contact@allo-media.fr>"]
edition = "2021"
license = "MIT"
description = "Parse and convert numbers written in English, Spanish, German, Italian or French into their digit representation."
description = "Parse and convert numbers written in English, Dutch, Spanish, German, Italian or French into their digit representation."
keywords = ["NLP", "words-to-numbers"]
categories = ["text-processing"]
repository = "https://github.com/allo-media/text2num-rs"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Parse and convert numbers written in English, Spanish or French into their digit representation.
# Parse and convert numbers written in English, Dutch, Spanish, German, Italian or French into their digit representation.

This crate provides a library for recognizing, parsing and transcribing into digits (base 10) numbers expressed in natural language.

Expand Down
1 change: 1 addition & 0 deletions src/digit_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ impl DigitString {
self.frozen = false;
self.marker = MorphologicalMarker::None;
self.buffer.clear();
self.flags = 0;
}

/// Freeze the DigitSring to signal the number is complete.
Expand Down
3 changes: 2 additions & 1 deletion src/lang/fr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ impl LangInterpretor for French {
}
"mille" | "mil" | "millième" if b.is_range_free(3, 5) => {
let peek = b.peek(2);
if peek == b"1" || peek == b"01" {
if peek == b"1" {
Err(Error::Overlap)
} else {
b.shift(3)
Expand Down Expand Up @@ -317,6 +317,7 @@ mod tests {
assert_text2digits!("quinze", "15");

assert_text2digits!("soixante quinze mille", "75000");
assert_text2digits!("cent un mille", "101000");
assert_text2digits!("un milliard vingt-cinq millions", "1025000000");
}

Expand Down
1 change: 0 additions & 1 deletion src/lang/it/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,6 @@ mod tests {
fn test_basic() {
assert_text2digits!("due", "2");
assert_text2digits!("dieci", "10");
assert_text2digits!("dieci", "10");
assert_text2digits!("tredici", "13");
assert_text2digits!("diciassette", "17");
assert_text2digits!("venti", "20");
Expand Down
9 changes: 8 additions & 1 deletion src/lang/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ mod en;
mod es;
mod fr;
mod it;
mod nl;

use crate::digit_string::DigitString;

Expand All @@ -37,6 +38,7 @@ pub use en::English;
pub use es::Spanish;
pub use fr::French;
pub use it::Italian;
pub use nl::Dutch;

/// Model the Morphological markers that differenciate ordinals or fractions from cardinals,
/// and that must be retained on the digit form.
Expand Down Expand Up @@ -128,6 +130,7 @@ pub enum Language {
German(German),
Italian(Italian),
Spanish(Spanish),
Dutch(Dutch),
}

impl Language {
Expand All @@ -150,6 +153,10 @@ impl Language {
pub fn spanish() -> Self {
Language::Spanish(Spanish::default())
}

pub fn dutch() -> Self {
Language::Dutch(Dutch::default())
}
}

macro_rules! delegate {
Expand Down Expand Up @@ -219,5 +226,5 @@ macro_rules! delegate {
}

impl LangInterpretor for Language {
delegate!(French, English, German, Italian, Spanish);
delegate!(Dutch, French, English, German, Italian, Spanish);
}
Loading

0 comments on commit a7b8889

Please sign in to comment.