Skip to content

Commit

Permalink
nlprule-build: sure cursor is not seek'd all to the end (#39)
Browse files Browse the repository at this point in the history
* make sure cursor is not at the end already

Since transform data is short lived, the input data, input can be bound by 'r
as well.

* add another missing seek back to 0

* assure test data is decoded successfully
  • Loading branch information
drahnr authored Feb 20, 2021
1 parent aa4fa35 commit 28c9358
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 15 deletions.
1 change: 1 addition & 0 deletions build/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ fs-err = "2.5"
tempdir = "0.3"
smush = "0.1.5"
env_logger = "0.8"
nlprule_030 = { package = "nlprule", version = "0.3.0" }
32 changes: 17 additions & 15 deletions build/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ pub type Result<T> = std::result::Result<T, Error>;

/// Definition of the data transformation for the network retrieved, binencoded rules and tokenizer datasets.
pub trait TransformDataFn:
for<'w> Fn(Box<dyn Read>, Box<dyn Write + 'w>) -> result::Result<(), OtherError>
for<'w, 'r> Fn(Box<dyn Read + 'r>, Box<dyn Write + 'w>) -> result::Result<(), OtherError>
{
}

impl<T> TransformDataFn for T where
T: for<'w> Fn(Box<dyn Read>, Box<dyn Write + 'w>) -> result::Result<(), OtherError>
T: for<'w, 'r> Fn(Box<dyn Read + 'r>, Box<dyn Write + 'w>) -> result::Result<(), OtherError>
{
}

Expand Down Expand Up @@ -155,6 +155,7 @@ fn obtain_binary_cache_or_github(
let mut intermediate = Box::new(Cursor::new(Vec::<u8>::new()));
transform_data_fn(Box::new(reader_binenc), Box::new(&mut intermediate))
.map_err(Error::TransformError)?;
intermediate.seek(SeekFrom::Start(0_u64))?;
intermediate
} else {
Box::new(reader_binenc)
Expand Down Expand Up @@ -355,8 +356,8 @@ impl BinaryBuilder {
.open(&tokenizer_out)?,
);
if let Some(ref transform_data_fn) = self.transform_data_fn {
let mut transfer_buffer_rules = Cursor::new(Vec::new());
let mut transfer_buffer_tokenizer = Cursor::new(Vec::new());
let mut transfer_buffer_rules = Vec::new();
let mut transfer_buffer_tokenizer = Vec::new();

compile::compile(
build_dir,
Expand All @@ -365,10 +366,13 @@ impl BinaryBuilder {
)
.map_err(Error::CollationFailed)?;

transform_data_fn(Box::new(transfer_buffer_rules), Box::new(rules_sink))
assert_ne!(transfer_buffer_rules.len(), 0);
assert_ne!(transfer_buffer_tokenizer.len(), 0);

transform_data_fn(Box::new(&mut transfer_buffer_rules.as_slice()), Box::new(rules_sink))
.map_err(Error::TransformError)?;
transform_data_fn(
Box::new(transfer_buffer_tokenizer),
Box::new(&mut transfer_buffer_tokenizer.as_slice()),
Box::new(tokenizer_sink),
)
.map_err(Error::TransformError)?;
Expand Down Expand Up @@ -652,7 +656,9 @@ mod tests {
.join(Path::new(&tokenizer_filename("en")))
.with_extension("bin.gz");
assert!(tokenizer_path.exists());
smush::decode(&fs::read(tokenizer_path)?, smush::Codec::Gzip).unwrap();
let decoded = smush::decode(&fs::read(tokenizer_path)?, smush::Codec::Gzip).unwrap();

let _ = nlprule_030::Tokenizer::new_from(&mut decoded.as_slice()).unwrap();

Ok(())
}
Expand Down Expand Up @@ -700,6 +706,8 @@ mod tests {
let mut decoded = Vec::new();
decoder.read_to_end(&mut decoded).unwrap();

let _ = nlprule_030::Rules::new_from(&mut decoded.as_slice()).unwrap();

Ok(())
}

Expand Down Expand Up @@ -763,15 +771,9 @@ mod tests {
let rules_path = tempdir
.join(Path::new(&rules_filename("en")))
.with_extension("bin");
assert!(rules_path.exists());
assert!(rules_path.is_file());

// The following will always fail since the versions will mismatch and rebuilding does not make sense
// `get_build_dir` is tested separately
//
// ```rust,no_run
// let _ = nlprule::Rules::new(rules_path)
// .map_err(|e| Error::ValidationFailed("en".to_owned(), Binary::Rules, e))?;
// ```
let _ = nlprule_030::Rules::new(rules_path).unwrap();
Ok(())
}
}

0 comments on commit 28c9358

Please sign in to comment.