diff --git a/Cargo.lock b/Cargo.lock index ea2f22c..abec54b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,55 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + [[package]] name = "bitflags" version = "2.5.0" @@ -18,10 +67,57 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" name = "charsplit" version = "0.1.0" dependencies = [ + "clap", "colored", "prettytable-rs", ] +[[package]] +name = "clap" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" + [[package]] name = "colored" version = "2.1.0" @@ -91,6 +187,12 @@ dependencies = [ "wasi", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.3.9" @@ -108,6 +210,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + [[package]] name = "itoa" version = "1.0.11" @@ -217,6 +325,12 @@ dependencies = [ "syn", ] +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.66" @@ -271,6 +385,12 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index 74eb4c9..8b02926 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,5 +6,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +clap = { version = "4.5.7", features = ["derive"] } colored = "2.1.0" prettytable-rs = "0.10.0" diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..445d273 --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,46 @@ +pub mod cli { + use std::fmt; + + use clap::{arg, command, Parser}; + use clap::builder::TypedValueParser as _; + #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)] + pub enum Encoding { + UTF8, + } + + impl Encoding { + pub fn from_str(s: &str) -> Self { + match s { + "utf8" => Encoding::UTF8, + _ => Encoding::UTF8, + } + } + } + + impl fmt::Display for Encoding { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } + } + + #[derive(Parser, Debug)] + #[command(name = "charsplit")] + #[command(bin_name = "charsplit")] + #[command(about = "Split a string into its bytes and characters")] + #[command(long_about = "charsplit is a small utility tool that will give you information about your string input. It will split the string into its bytes and graphemes, and give you information about them.")] + #[command(version = "0.1.0")] + pub struct Arguments { + /// What encoding to use + #[arg( + short, + long, + default_value_t = Encoding::UTF8, + value_parser = clap::builder::PossibleValuesParser::new(["UTF8"]) + .map(|value| Encoding::from_str(&value)) + )] + pub encoding: Encoding, + + #[arg(default_value_t = String::new())] + pub text: String, + } +} diff --git a/src/main.rs b/src/main.rs index 3347e65..3d359ae 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,88 +1,40 @@ -use core::fmt; -use std::{char, io::{self, Read}, iter::Iterator, slice::Iter}; -use colored::Colorize; +use std::{io::{self, IsTerminal, Read}, process::{exit, ExitCode}}; +use clap::Parser; +use cli::cli::Arguments; use config::config::Config; -use charset::charset::{Charset, Utf8Charset}; use prettytable::{format, row, Table}; +use utils::utils::format_grapheme; mod config; mod charset; mod utf8_groups; +mod utils; +mod cli; -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] -enum ByteType { - Ascii, - Utf8Base, - Utf8Continuation, - AsciiNewLine, - Unknown -} - -impl fmt::Display for ByteType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self) - } -} - -fn get_byte_type(byte: &u8) -> ByteType { - if byte == &0x0A { - return ByteType::AsciiNewLine; - } - - if byte >> 7 == 0 { - return ByteType::Ascii; - } - - if byte >> 5 == 0b110 || byte >> 4 == 0b1110 || byte >> 3 == 0b11110 { - return ByteType::Utf8Base; - } - - if byte >> 6 == 0b10 { - return ByteType::Utf8Continuation; - } - - ByteType::Unknown -} - -fn format_grapheme(table: &mut Table, grapheme: &char, use_truecolors: &bool) { - let grapheme_string = grapheme.to_string(); - let bytes = grapheme_string.as_bytes(); - - let first_byte = bytes.get(0).unwrap(); - - table.add_row(row![ - grapheme, - format!("{:0b}", first_byte), - format!("{}", (*first_byte) as u32), - (*grapheme) as u32, - format!("0x{:X}", (*grapheme) as u32), - get_byte_type(&first_byte), - Utf8Charset::get_description(&((*grapheme) as u32)), - ]); +fn main() -> ExitCode { + let config = Config::build(); + let args = Arguments::parse(); + + let input = match args.text.as_str() { + "" => { + if io::stdin().is_terminal() { + String::new() + } else { + let mut input = String::new(); + io::stdin().read_to_string(&mut input).unwrap(); + input + } + } + _ => args.text + }; + dbg!(&input); - for byte in &bytes[1..] { - table.add_row(row![ - " ", - format!("{:08b}", byte), - format!("{}", (*byte) as u32), - " ", - " ", - get_byte_type(&byte), - " ", - ]); + if input.len() == 0 { + eprintln!("Please provide some input either via argument or stdin"); + return ExitCode::FAILURE; } -} - -fn main() { - let config = Config::build(); - - let input = { - let mut input = String::new(); - io::stdin().read_to_string(&mut input).unwrap(); - input - }; let graphemes = input.chars(); let mut table = Table::new(); @@ -106,4 +58,6 @@ fn main() { ]); table.printstd(); + + ExitCode::SUCCESS } diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..df0d2c2 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,73 @@ +pub mod utils { + use std::fmt; + + use prettytable::{row, Table}; + + use crate::charset::charset::{Charset, Utf8Charset}; + + #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] + pub enum ByteType { + Ascii, + Utf8Base, + Utf8Continuation, + AsciiNewLine, + Unknown + } + + impl fmt::Display for ByteType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } + } + + pub fn get_byte_type(byte: &u8) -> ByteType { + if byte == &0x0A { + return ByteType::AsciiNewLine; + } + + if byte >> 7 == 0 { + return ByteType::Ascii; + } + + if byte >> 5 == 0b110 || byte >> 4 == 0b1110 || byte >> 3 == 0b11110 { + return ByteType::Utf8Base; + } + + if byte >> 6 == 0b10 { + return ByteType::Utf8Continuation; + } + + ByteType::Unknown + } + + pub fn format_grapheme(table: &mut Table, grapheme: &char, use_truecolors: &bool) { + let grapheme_string = grapheme.to_string(); + let bytes = grapheme_string.as_bytes(); + + let first_byte = bytes.get(0).unwrap(); + + table.add_row(row![ + grapheme, + format!("{:0b}", first_byte), + format!("{}", (*first_byte) as u32), + (*grapheme) as u32, + format!("0x{:X}", (*grapheme) as u32), + get_byte_type(&first_byte), + Utf8Charset::get_description(&((*grapheme) as u32)), + ]); + + + for byte in &bytes[1..] { + table.add_row(row![ + " ", + format!("{:08b}", byte), + format!("{}", (*byte) as u32), + " ", + " ", + get_byte_type(&byte), + " ", + ]); + } + + } +}