From 5ec21bdde5541fed6c6264a438dc8a8df9f30664 Mon Sep 17 00:00:00 2001 From: Michal Siedlaczek Date: Fri, 4 Mar 2022 11:13:38 -0500 Subject: [PATCH] Test lexicons in integration tests --- tests/toy.rs | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/tests/toy.rs b/tests/toy.rs index 0727693..ede9ad1 100644 --- a/tests/toy.rs +++ b/tests/toy.rs @@ -1,4 +1,4 @@ -use ciff::{ciff_to_pisa, pisa_to_ciff}; +use ciff::{ciff_to_pisa, pisa_to_ciff, PayloadSlice}; use std::fs::read; use std::path::PathBuf; use tempfile::TempDir; @@ -9,13 +9,19 @@ fn test_toy_index() -> anyhow::Result<()> { let input_path = PathBuf::from("tests/test_data/toy-complete-20200309.ciff"); let temp = TempDir::new().unwrap(); let output_path = temp.path().join("coll"); - if let Err(err) = ciff_to_pisa(&input_path, &output_path, false) { + if let Err(err) = ciff_to_pisa(&input_path, &output_path, true) { panic!("{}", err); } assert_eq!( std::fs::read_to_string(temp.path().join("coll.documents"))?, "WSJ_1\nTREC_DOC_1\nDOC222\n" ); + let bytes = std::fs::read(temp.path().join("coll.doclex"))?; + let actual_titles: Vec<_> = PayloadSlice::new(&bytes).iter().collect(); + assert_eq!( + actual_titles, + vec![b"WSJ_1".as_ref(), b"TREC_DOC_1", b"DOC222"], + ); assert_eq!( std::fs::read(temp.path().join("coll.sizes"))?, vec![3, 0, 0, 0, 6, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0] @@ -26,6 +32,22 @@ fn test_toy_index() -> anyhow::Result<()> { .collect::>(), vec!["01", "03", "30", "content", "enough", "head", "simpl", "text", "veri"] ); + let bytes = std::fs::read(temp.path().join("coll.termlex"))?; + let actual_terms: Vec<_> = PayloadSlice::new(&bytes).iter().collect(); + assert_eq!( + actual_terms, + vec![ + b"01".as_ref(), + b"03", + b"30", + b"content", + b"enough", + b"head", + b"simpl", + b"text", + b"veri" + ] + ); assert_eq!( std::fs::read(temp.path().join("coll.docs"))?, vec![