Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleaning data files now removes empty directories left behind #55

Merged
merged 1 commit into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 108 additions & 41 deletions src/commands/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::path::{Path, PathBuf};

use crate::new::normalize_path;
use crate::options::{KerblamTomlOptions, RemoteFile};
use crate::utils::{ask_for, run_command, YesNo};
use crate::utils::{ask_for, find_dirs, run_command, YesNo};

use anyhow::{anyhow, bail, Result};
use indicatif::{ProgressBar, ProgressFinish, ProgressStyle};
Expand Down Expand Up @@ -324,14 +324,51 @@ pub fn fetch_remote_data(config: KerblamTomlOptions) -> Result<()> {
}
}

pub fn clean_data(config: KerblamTomlOptions, keep_remote: bool) -> Result<()> {
fn delete_files(files: Vec<PathBuf>) -> Result<()> {
let progress = ProgressBar::new(files.len() as u64);

let mut failures: Vec<(PathBuf, std::io::Error)> = vec![];
for file in progress.wrap_iter(files.into_iter()) {
if file.metadata().unwrap().is_file() {
if let Err(e) = fs::remove_file(&file) {
failures.push((file, e));
}
} else {
if let Err(e) = fs::remove_dir(&file) {
failures.push((file, e))
}
}
}

if !failures.is_empty() {
bail!(
"Failed to clean some files:\n {}",
failures
.into_iter()
.map(|x| {
format!(
"\t- {}: {}\n",
normalize_path(x.0.strip_prefix(current_dir().unwrap()).unwrap())
.to_string_lossy(),
x.1.to_string()
)
})
.collect::<String>()
)
};

Ok(())
}

pub fn clean_data(config: KerblamTomlOptions, keep_remote: bool, keep_dirs: bool) -> Result<()> {
let cleanable_files = config.volatile_files();
let remote_files: Vec<PathBuf> = config
.remote_files()
.into_iter()
.map(|remote| remote.path)
.collect();

// Filter out the remote files if we so say
let cleanable_files: Vec<PathBuf> = if keep_remote {
cleanable_files
.into_iter()
Expand All @@ -344,51 +381,81 @@ pub fn clean_data(config: KerblamTomlOptions, keep_remote: bool) -> Result<()> {
cleanable_files
};

if cleanable_files.is_empty() {
println!("✨ Nothing to clean!");
log::debug!("Files to clean: {:?}", cleanable_files);

if !cleanable_files.is_empty() {
let question = format!(
"🧹 About to delete {} files ({}). Continue?",
cleanable_files.len(),
unsafe_path_filesize_conversion(&cleanable_files)
.into_iter()
.sum::<FileSize>()
);

match ask_for::<YesNo>(question.as_str()) {
YesNo::Yes => delete_files(cleanable_files.clone())?,
YesNo::No => {
bail!("Aborted!");
}
};
}

// After we cleanup the files, we can cleanup the directories
if keep_dirs {
if cleanable_files.is_empty() {
println!("✨ Nothing to clean!")
}
return Ok(());
}

let question = format!(
"🧹 About to delete {} files ({}). Continue?",
cleanable_files.len(),
unsafe_path_filesize_conversion(&cleanable_files)
// A tiny utility to get rid of filter paths that overlap
fn remove_useless_filters(target: &Path, filters: Vec<PathBuf>) -> Vec<PathBuf> {
filters
.into_iter()
.sum::<FileSize>()
);

let progress = ProgressBar::new(cleanable_files.len() as u64);

match ask_for::<YesNo>(question.as_str()) {
YesNo::Yes => {
let mut failures: Vec<(PathBuf, std::io::Error)> = vec![];
for file in progress.wrap_iter(cleanable_files.into_iter()) {
if let Err(e) = fs::remove_file(file.clone()) {
failures.push((file.clone(), e));
}
}
.filter(|x| !target.starts_with(x))
.collect()
}

if !failures.is_empty() {
bail!(
"Failed to clean some files:\n {}",
failures
.into_iter()
.map(|x| {
format!(
"\t- {}: {}\n",
normalize_path(x.0.strip_prefix(current_dir().unwrap()).unwrap())
.to_string_lossy(),
x.1.to_string()
)
})
.collect::<String>()
)
};
}
YesNo::No => {
bail!("Aborted!");
let dirs = [
find_dirs(
config.output_data_dir(),
Some(remove_useless_filters(
config.output_data_dir().as_ref(),
vec![config.input_data_dir(), config.intermediate_data_dir()],
)),
),
find_dirs(
config.intermediate_data_dir(),
Some(remove_useless_filters(
config.intermediate_data_dir().as_ref(),
vec![config.input_data_dir(), config.output_data_dir()],
)),
),
]
.concat();

// Remove the root directories we DO NOT want to clean.
let mut dirs: Vec<PathBuf> = dirs
.into_iter()
.filter(|x| *x != config.output_data_dir() && *x != config.intermediate_data_dir())
.collect();

// We need to sort the dirs from deepest to shallowest in order to
// delete them in order, or else `delete_files` just dies.
dirs.sort_unstable_by_key(|i| i.ancestors().count());
dirs.reverse();
log::debug!("Dirs to clean: {:?}", dirs);

if !dirs.is_empty() {
println!("🧹 Removing empty directories left behind...");
// This dies if the directory is not empty. So it's generally safe
// even if some bug introduces an error here.
delete_files(dirs)?;
} else {
if cleanable_files.is_empty() {
println!("✨ Nothing to clean!")
}
};
}

Ok(())
}
Expand Down
10 changes: 7 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ enum DataCommands {
#[arg(long, short, action)]
/// Do not delete locally present remote files.
keep_remote: bool,
#[arg(long, short('d'), action)]
/// Do not delete locally present directories.
keep_dirs: bool,
},
// Pack local data for export to others
Pack {
Expand Down Expand Up @@ -135,9 +138,10 @@ fn main() -> anyhow::Result<()> {
println!("{}", data_info)
}
Some(DataCommands::Fetch) => data::fetch_remote_data(config.unwrap())?,
Some(DataCommands::Clean { keep_remote }) => {
data::clean_data(config.unwrap(), keep_remote)?
}
Some(DataCommands::Clean {
keep_remote,
keep_dirs,
}) => data::clean_data(config.unwrap(), keep_remote, keep_dirs)?,
Some(DataCommands::Pack { output_path: path }) => data::package_data_to_archive(
config.unwrap(),
path.unwrap_or(here.join("data/data_export.tar.gz")),
Expand Down
24 changes: 21 additions & 3 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::process::Command;
use std::str::FromStr;

use version_compare::Version;
use walkdir;
use walkdir::{self, DirEntry};

use crate::options::{KerblamTomlOptions, Pipe};
use crate::VERSION;
Expand Down Expand Up @@ -244,14 +244,24 @@ pub fn fetch_gitignore(name: &str) -> Result<String> {
Ok(response)
}

pub fn find_files(inspected_path: impl AsRef<Path>, filters: Option<Vec<PathBuf>>) -> Vec<PathBuf> {
fn find_path_items_with_filter(
inspected_path: impl AsRef<Path>,
top_level_filter: fn(&DirEntry) -> bool,
filters: Option<Vec<PathBuf>>,
) -> Vec<PathBuf> {
let inspected_path = inspected_path.as_ref();

if let Some(filters) = filters {
// The filters are here to get rid of items that *might* be included
// by accident, especially when finding data paths.
//
// For example, if we want all files in /data/out but we want to
// preserve the files in /data/, we can add the /data/ filter.
walkdir::WalkDir::new(inspected_path)
.into_iter()
.filter_map(|i| i.ok())
.filter(|x| {
// If filter returns true, we return this path
let mut p = true;
for path in filters.clone() {
if x.path().starts_with(path) {
Expand All @@ -260,7 +270,7 @@ pub fn find_files(inspected_path: impl AsRef<Path>, filters: Option<Vec<PathBuf>
}
p
})
.filter(|path| path.metadata().unwrap().is_file())
.filter(top_level_filter)
.map(|x| x.path().to_owned())
.collect()
} else {
Expand All @@ -273,6 +283,14 @@ pub fn find_files(inspected_path: impl AsRef<Path>, filters: Option<Vec<PathBuf>
}
}

pub fn find_files(inspected_path: impl AsRef<Path>, filters: Option<Vec<PathBuf>>) -> Vec<PathBuf> {
find_path_items_with_filter(inspected_path, |x| x.metadata().unwrap().is_file(), filters)
}

pub fn find_dirs(inspected_path: impl AsRef<Path>, filters: Option<Vec<PathBuf>>) -> Vec<PathBuf> {
find_path_items_with_filter(inspected_path, |x| x.metadata().unwrap().is_dir(), filters)
}

pub fn warn_kerblam_version(config: &KerblamTomlOptions) -> () {
// TODO: is there a way to avoid this clone()? I feel like there should be
// but I'm not sure.
Expand Down
Loading