Skip to content

Commit

Permalink
add console progress bar; add signal into parsers; refactor reader li…
Browse files Browse the repository at this point in the history
…ne: use read_until instead iterator (lines, split)
  • Loading branch information
7phs committed Dec 29, 2017
1 parent d55341a commit ade5064
Show file tree
Hide file tree
Showing 15 changed files with 535 additions and 90 deletions.
101 changes: 101 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ version = "0.1.0"
authors = ["alexey <piyanin@gmail.com>"]

[dependencies]
rand = "0.3"
clap = "2.28.0"
indicatif = "0.8.0"
rand = "0.3"
diesel = { version = "1.0.0-beta1", features = ["sqlite", "postgres", "mysql"] }
diesel_infer_schema = { version = "1.0.0-beta1", features = ["sqlite", "postgres", "mysql"] }
diesel_migrations = "1.0.0-beta1"
4 changes: 3 additions & 1 deletion src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ impl<'a> Default for Args<'a> {
.about("Convert a word vector file (fasttext, glove, word2vec, gensim) to DB")
.arg(Arg::with_name("db_uri")
.short("db")
.long("db")
.takes_value(true)
.default_value("wordvector.db")
.help("path to result database"));
Expand Down Expand Up @@ -51,7 +52,7 @@ impl<'a> Default for Args<'a> {
},
Argument {
name: "word2vec/gensim",
short: "gm",
short: "t",
long: "gensim",
help: "word2vec word vector text file with gensim format (*.tsv)",
file_path: VectorFile::Gensim,
Expand Down Expand Up @@ -108,6 +109,7 @@ impl<'a> Args<'a> {

pub fn print_help(&mut self) {
self.app.print_help();
println!();
}

pub fn file_path(&self) -> Option<&[VectorFile]> {
Expand Down
12 changes: 5 additions & 7 deletions src/converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,11 @@ impl Converter {
run_migrations(&self.connection)
}

pub fn convert(&self, data_iterator: &mut [DataIterator]) -> Result<(), String> {
for data in data_iterator {
if let Some(kind) = create_kind(&self.connection, data.kind()) {
for record in data.iter() {
if let Some(word) = create_word(&self.connection, &record.word) {
add_vectors(&self.connection, &Vector::from_vec(&word, &kind, &record.vec));
}
pub fn convert(&self, data_iterator: &mut DataIterator) -> Result<(), String> {
if let Some(kind) = create_kind(&self.connection, data_iterator.kind()) {
for record in data_iterator.iter() {
if let Some(word) = create_word(&self.connection, &record.word) {
add_vectors(&self.connection, &Vector::from_vec(&word, &kind, &record.vec));
}
}
}
Expand Down
45 changes: 33 additions & 12 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#![feature(integer_atomics)]
#![feature(rand)]
#![feature(test)]
#![feature(universal_impl_trait)]
Expand All @@ -10,36 +11,56 @@ extern crate diesel_infer_schema;
extern crate diesel_migrations;

extern crate clap;
extern crate indicatif;
extern crate test;

#[cfg(test)]
extern crate rand;

mod args;
mod converter;
mod data;
mod db;
mod progressbar;
mod wordvector;

#[cfg(feature = "dumb")]
mod data;

use std::rc::Rc;
use args::Args;
use converter::Converter;
use progressbar::Progress;
use wordvector::dataiterator::DataIterator;
use wordvector::VectorFile;

fn convert_process(converter: Converter, vector_files: &[VectorFile]) {
converter.prepare();

let progress_signal = Rc::new(Progress::start());
let data_iterators = DataIterator::make_vec(progress_signal.clone(), vector_files);

progress_signal.init(data_iterators.len() as u64);

data_iterators.into_iter()
.for_each(|mut data_iter| {
progress_signal.start(data_iter.kind(), 100);

converter.convert(&mut data_iter);
});
}

fn main() {
#[cfg(feature = "dumb")]
data::test("test.data");

let mut arg = args::Args::default();
let mut arg = Args::default();

if arg.is_incomplete() {
arg.print_help();
} else {
match converter::Converter::new(arg.database_uri().unwrap()) {
Ok(converter) => {
converter.prepare();

let mut data_iter = wordvector::dataiterator::DataIterator::make_vec(arg.file_path().unwrap());
converter.convert(&mut data_iter);
}
Err(err) => {
println!("failed to initialise a converter with {:?}", err);
}
match Converter::new(arg.database_uri().unwrap()) {
Ok(converter) => convert_process(converter, arg.file_path().unwrap()),
Err(err) => println!("failed to initialise a converter with {:?}", err),
}
}
}
Expand Down
Loading

0 comments on commit ade5064

Please sign in to comment.