Skip to content

Implement character table control and codepage 437 option #195

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions src/colors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use owo_colors::{colors, Color};

pub const COLOR_NULL: &[u8] = colors::BrightBlack::ANSI_FG.as_bytes();
pub const COLOR_OFFSET: &[u8] = colors::BrightBlack::ANSI_FG.as_bytes();
pub const COLOR_ASCII_PRINTABLE: &[u8] = colors::Cyan::ANSI_FG.as_bytes();
pub const COLOR_ASCII_WHITESPACE: &[u8] = colors::Green::ANSI_FG.as_bytes();
pub const COLOR_ASCII_OTHER: &[u8] = colors::Green::ANSI_FG.as_bytes();
pub const COLOR_NONASCII: &[u8] = colors::Yellow::ANSI_FG.as_bytes();
pub const COLOR_RESET: &[u8] = colors::Default::ANSI_FG.as_bytes();

#[rustfmt::skip]
pub const CP437: [char; 256] = [
// Copyright (c) 2016, Delan Azabani <delan@azabani.com>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// modified to use the ⋄ character instead of ␀

// use https://en.wikipedia.org/w/index.php?title=Code_page_437&oldid=978947122
// not ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT
// because we want the graphic versions of 01h–1Fh + 7Fh
'⋄','☺','☻','♥','♦','♣','♠','•','◘','○','◙','♂','♀','♪','♫','☼',
'►','◄','↕','‼','¶','§','▬','↨','↑','↓','→','←','∟','↔','▲','▼',
' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/',
'0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
'@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_',
'`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~','⌂',
'Ç','ü','é','â','ä','à','å','ç','ê','ë','è','ï','î','ì','Ä','Å',
'É','æ','Æ','ô','ö','ò','û','ù','ÿ','Ö','Ü','¢','£','¥','₧','ƒ',
'á','í','ó','ú','ñ','Ñ','ª','º','¿','⌐','¬','½','¼','¡','«','»',
'░','▒','▓','│','┤','╡','╢','╖','╕','╣','║','╗','╝','╜','╛','┐',
'└','┴','┬','├','─','┼','╞','╟','╚','╔','╩','╦','╠','═','╬','╧',
'╨','╤','╥','╙','╘','╒','╓','╫','╪','┘','┌','█','▄','▌','▐','▀',
'α','ß','Γ','π','Σ','σ','µ','τ','Φ','Θ','Ω','δ','∞','φ','ε','∩',
'≡','±','≥','≤','⌠','⌡','÷','≈','°','∙','·','√','ⁿ','²','■','ff',
];
61 changes: 38 additions & 23 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,18 @@
pub(crate) mod colors;
pub(crate) mod input;

pub use colors::*;
pub use input::*;

use std::io::{self, BufReader, Read, Write};

use owo_colors::{colors, Color};

pub enum Base {
Binary,
Octal,
Decimal,
Hexadecimal,
}

const COLOR_NULL: &[u8] = colors::BrightBlack::ANSI_FG.as_bytes();
const COLOR_OFFSET: &[u8] = colors::BrightBlack::ANSI_FG.as_bytes();
const COLOR_ASCII_PRINTABLE: &[u8] = colors::Cyan::ANSI_FG.as_bytes();
const COLOR_ASCII_WHITESPACE: &[u8] = colors::Green::ANSI_FG.as_bytes();
const COLOR_ASCII_OTHER: &[u8] = colors::Green::ANSI_FG.as_bytes();
const COLOR_NONASCII: &[u8] = colors::Yellow::ANSI_FG.as_bytes();
const COLOR_RESET: &[u8] = colors::Default::ANSI_FG.as_bytes();

#[derive(Copy, Clone)]
pub enum ByteCategory {
Null,
Expand All @@ -30,6 +22,13 @@ pub enum ByteCategory {
NonAscii,
}

#[derive(Copy, Clone)]
#[non_exhaustive]
pub enum CharacterTable {
AsciiOnly,
CP437,
}

#[derive(Copy, Clone)]
pub enum Endianness {
Little,
Expand Down Expand Up @@ -64,7 +63,6 @@ impl Byte {

fn color(self) -> &'static [u8] {
use crate::ByteCategory::*;

match self.category() {
Null => COLOR_NULL,
AsciiPrintable => COLOR_ASCII_PRINTABLE,
Expand All @@ -74,16 +72,18 @@ impl Byte {
}
}

fn as_char(self) -> char {
fn as_char(self, character_table: CharacterTable) -> char {
use crate::ByteCategory::*;

match self.category() {
Null => '⋄',
AsciiPrintable => self.0 as char,
AsciiWhitespace if self.0 == 0x20 => ' ',
AsciiWhitespace => '_',
AsciiOther => '•',
NonAscii => '×',
match character_table {
CharacterTable::AsciiOnly => match self.category() {
Null => '⋄',
AsciiPrintable => self.0 as char,
AsciiWhitespace if self.0 == 0x20 => ' ',
AsciiWhitespace => '_',
AsciiOther => '•',
NonAscii => '×',
},
CharacterTable::CP437 => CP437[self.0 as usize],
}
}
}
Expand Down Expand Up @@ -167,6 +167,7 @@ pub struct PrinterBuilder<'a, Writer: Write> {
group_size: u8,
base: Base,
endianness: Endianness,
character_table: CharacterTable,
}

impl<'a, Writer: Write> PrinterBuilder<'a, Writer> {
Expand All @@ -182,6 +183,7 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> {
group_size: 1,
base: Base::Hexadecimal,
endianness: Endianness::Big,
character_table: CharacterTable::AsciiOnly,
}
}

Expand Down Expand Up @@ -230,6 +232,11 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> {
self
}

pub fn character_table(mut self, character_table: CharacterTable) -> Self {
self.character_table = character_table;
self
}

pub fn build(self) -> Printer<'a, Writer> {
Printer::new(
self.writer,
Expand All @@ -242,6 +249,7 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> {
self.group_size,
self.base,
self.endianness,
self.character_table,
)
}
}
Expand Down Expand Up @@ -271,6 +279,8 @@ pub struct Printer<'a, Writer: Write> {
base_digits: u8,
/// Whether to show groups in little or big endian format.
endianness: Endianness,
/// The character table to reference for the character panel.
character_table: CharacterTable,
}

impl<'a, Writer: Write> Printer<'a, Writer> {
Expand All @@ -285,6 +295,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> {
group_size: u8,
base: Base,
endianness: Endianness,
character_table: CharacterTable,
) -> Printer<'a, Writer> {
Printer {
idx: 0,
Expand All @@ -304,7 +315,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> {
})
.collect(),
byte_char_panel: (0u8..=u8::MAX)
.map(|i| format!("{}", Byte(i).as_char()))
.map(|i| format!("{}", Byte(i).as_char(character_table)))
.collect(),
byte_hex_panel_g: (0u8..=u8::MAX).map(|i| format!("{i:02x}")).collect(),
squeezer: if use_squeeze {
Expand All @@ -323,6 +334,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> {
Base::Hexadecimal => 2,
},
endianness,
character_table,
}
}

Expand Down Expand Up @@ -401,8 +413,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> {
if self.show_position_panel {
match self.squeezer {
Squeezer::Print => {
self.writer
.write_all(self.byte_char_panel[b'*' as usize].as_bytes())?;
self.writer.write_all(&[b'*'])?;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please explain this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a leftover bug from my initial design. Originally, the colored characters were pre-colored and cached ahead of time. I changed the way that coloring works in #176 to write prefix color codes separately from the characters themselves. The caching is now unnecessary here.

if self.show_color {
self.writer.write_all(COLOR_RESET)?;
}
Expand Down Expand Up @@ -732,6 +743,7 @@ mod tests {
1,
Base::Hexadecimal,
Endianness::Big,
CharacterTable::AsciiOnly,
);

printer.print_all(input).unwrap();
Expand Down Expand Up @@ -787,6 +799,7 @@ mod tests {
1,
Base::Hexadecimal,
Endianness::Big,
CharacterTable::AsciiOnly,
);
printer.display_offset(0xdeadbeef);

Expand Down Expand Up @@ -821,6 +834,7 @@ mod tests {
1,
Base::Hexadecimal,
Endianness::Big,
CharacterTable::AsciiOnly,
);

printer.print_all(input).unwrap();
Expand Down Expand Up @@ -881,6 +895,7 @@ mod tests {
1,
Base::Hexadecimal,
Endianness::Big,
CharacterTable::AsciiOnly,
);

printer.print_all(input).unwrap();
Expand Down
26 changes: 25 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use thiserror::Error as ThisError;

use terminal_size::terminal_size;

use hexyl::{Base, BorderStyle, Endianness, Input, PrinterBuilder};
use hexyl::{Base, BorderStyle, CharacterTable, Endianness, Input, PrinterBuilder};

#[cfg(test)]
mod tests;
Expand Down Expand Up @@ -212,6 +212,18 @@ fn run() -> Result<()> {
.hide(true)
.help("An alias for '--endianness=little'."),
)
.arg(
Arg::new("character-table")
.long("character-table")
.value_name("FORMAT")
.value_parser(["codepage-437", "ascii-only"])
.default_value("ascii-only")
.help(
"The character table that should be used. 'ascii-only' \
will show dots for non-ASCII characters, 'codepage-437' \
will use Code page 437 for those characters."
),
)
.arg(
Arg::new("base")
.short('b')
Expand Down Expand Up @@ -469,6 +481,17 @@ fn run() -> Result<()> {
("big", _) => Endianness::Big,
_ => unreachable!(),
};

let character_table = match matches
.get_one::<String>("character-table")
.unwrap()
.as_ref()
{
"ascii-only" => CharacterTable::AsciiOnly,
"codepage-437" => CharacterTable::CP437,
_ => unreachable!(),
};

let stdout = io::stdout();
let mut stdout_lock = BufWriter::new(stdout.lock());

Expand All @@ -482,6 +505,7 @@ fn run() -> Result<()> {
.group_size(group_size)
.with_base(base)
.endianness(endianness)
.character_table(character_table)
.build();
printer.display_offset(skip_offset + display_offset);
printer.print_all(&mut reader).map_err(|e| anyhow!(e))?;
Expand Down
59 changes: 59 additions & 0 deletions tests/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -618,3 +618,62 @@ mod base {
);
}
}

mod character_table {
use super::hexyl;
use super::PrettyAssert;

#[test]
fn codepage_437() {
hexyl()
.arg("hello_world_elf64")
.arg("--color=never")
.arg("--character-table=codepage-437")
.assert()
.success()
.pretty_stdout(
"┌────────┬─────────────────────────┬─────────────────────────┬────────┬────────┐
│00000000│ 7f 45 4c 46 02 01 01 00 ┊ 00 00 00 00 00 00 00 00 │⌂ELF☻☺☺⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│00000010│ 02 00 3e 00 01 00 00 00 ┊ 00 10 40 00 00 00 00 00 │☻⋄>⋄☺⋄⋄⋄┊⋄►@⋄⋄⋄⋄⋄│
│00000020│ 40 00 00 00 00 00 00 00 ┊ 28 20 00 00 00 00 00 00 │@⋄⋄⋄⋄⋄⋄⋄┊( ⋄⋄⋄⋄⋄⋄│
│00000030│ 00 00 00 00 40 00 38 00 ┊ 03 00 40 00 04 00 03 00 │⋄⋄⋄⋄@⋄8⋄┊♥⋄@⋄♦⋄♥⋄│
│00000040│ 01 00 00 00 04 00 00 00 ┊ 00 00 00 00 00 00 00 00 │☺⋄⋄⋄♦⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│00000050│ 00 00 40 00 00 00 00 00 ┊ 00 00 40 00 00 00 00 00 │⋄⋄@⋄⋄⋄⋄⋄┊⋄⋄@⋄⋄⋄⋄⋄│
│00000060│ e8 00 00 00 00 00 00 00 ┊ e8 00 00 00 00 00 00 00 │Φ⋄⋄⋄⋄⋄⋄⋄┊Φ⋄⋄⋄⋄⋄⋄⋄│
│00000070│ 00 10 00 00 00 00 00 00 ┊ 01 00 00 00 05 00 00 00 │⋄►⋄⋄⋄⋄⋄⋄┊☺⋄⋄⋄♣⋄⋄⋄│
│00000080│ 00 10 00 00 00 00 00 00 ┊ 00 10 40 00 00 00 00 00 │⋄►⋄⋄⋄⋄⋄⋄┊⋄►@⋄⋄⋄⋄⋄│
│00000090│ 00 10 40 00 00 00 00 00 ┊ 1d 00 00 00 00 00 00 00 │⋄►@⋄⋄⋄⋄⋄┊↔⋄⋄⋄⋄⋄⋄⋄│
│000000a0│ 1d 00 00 00 00 00 00 00 ┊ 00 10 00 00 00 00 00 00 │↔⋄⋄⋄⋄⋄⋄⋄┊⋄►⋄⋄⋄⋄⋄⋄│
│000000b0│ 01 00 00 00 06 00 00 00 ┊ 00 20 00 00 00 00 00 00 │☺⋄⋄⋄♠⋄⋄⋄┊⋄ ⋄⋄⋄⋄⋄⋄│
│000000c0│ 00 20 40 00 00 00 00 00 ┊ 00 20 40 00 00 00 00 00 │⋄ @⋄⋄⋄⋄⋄┊⋄ @⋄⋄⋄⋄⋄│
│000000d0│ 0e 00 00 00 00 00 00 00 ┊ 0e 00 00 00 00 00 00 00 │♫⋄⋄⋄⋄⋄⋄⋄┊♫⋄⋄⋄⋄⋄⋄⋄│
│000000e0│ 00 10 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄►⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│000000f0│ 00 00 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│* │ ┊ │ ┊ │
│00001000│ ba 0e 00 00 00 b9 00 20 ┊ 40 00 bb 01 00 00 00 b8 │║♫⋄⋄⋄╣⋄ ┊@⋄╗☺⋄⋄⋄╕│
│00001010│ 04 00 00 00 cd 80 b8 01 ┊ 00 00 00 cd 80 00 00 00 │♦⋄⋄⋄═Ç╕☺┊⋄⋄⋄═Ç⋄⋄⋄│
│00001020│ 00 00 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│* │ ┊ │ ┊ │
│00002000│ 48 65 6c 6c 6f 2c 20 77 ┊ 6f 72 6c 64 21 0a 00 2e │Hello, w┊orld!◙⋄.│
│00002010│ 73 68 73 74 72 74 61 62 ┊ 00 2e 74 65 78 74 00 2e │shstrtab┊⋄.text⋄.│
│00002020│ 64 61 74 61 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │data⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│00002030│ 00 00 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│* │ ┊ │ ┊ │
│00002060│ 00 00 00 00 00 00 00 00 ┊ 0b 00 00 00 01 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊♂⋄⋄⋄☺⋄⋄⋄│
│00002070│ 06 00 00 00 00 00 00 00 ┊ 00 10 40 00 00 00 00 00 │♠⋄⋄⋄⋄⋄⋄⋄┊⋄►@⋄⋄⋄⋄⋄│
│00002080│ 00 10 00 00 00 00 00 00 ┊ 1d 00 00 00 00 00 00 00 │⋄►⋄⋄⋄⋄⋄⋄┊↔⋄⋄⋄⋄⋄⋄⋄│
│00002090│ 00 00 00 00 00 00 00 00 ┊ 10 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊►⋄⋄⋄⋄⋄⋄⋄│
│000020a0│ 00 00 00 00 00 00 00 00 ┊ 11 00 00 00 01 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊◄⋄⋄⋄☺⋄⋄⋄│
│000020b0│ 03 00 00 00 00 00 00 00 ┊ 00 20 40 00 00 00 00 00 │♥⋄⋄⋄⋄⋄⋄⋄┊⋄ @⋄⋄⋄⋄⋄│
│000020c0│ 00 20 00 00 00 00 00 00 ┊ 0e 00 00 00 00 00 00 00 │⋄ ⋄⋄⋄⋄⋄⋄┊♫⋄⋄⋄⋄⋄⋄⋄│
│000020d0│ 00 00 00 00 00 00 00 00 ┊ 04 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊♦⋄⋄⋄⋄⋄⋄⋄│
│000020e0│ 00 00 00 00 00 00 00 00 ┊ 01 00 00 00 03 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊☺⋄⋄⋄♥⋄⋄⋄│
│000020f0│ 00 00 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│00002100│ 0e 20 00 00 00 00 00 00 ┊ 17 00 00 00 00 00 00 00 │♫ ⋄⋄⋄⋄⋄⋄┊↨⋄⋄⋄⋄⋄⋄⋄│
│00002110│ 00 00 00 00 00 00 00 00 ┊ 01 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊☺⋄⋄⋄⋄⋄⋄⋄│
│00002120│ 00 00 00 00 00 00 00 00 ┊ │⋄⋄⋄⋄⋄⋄⋄⋄┊ │
└────────┴─────────────────────────┴─────────────────────────┴────────┴────────┘
",
);
}
}