From 5394be08a4d05bcd7eca7575da0c4f79f582d3c5 Mon Sep 17 00:00:00 2001 From: Thomas Huber Date: Mon, 26 Feb 2024 16:29:41 -0700 Subject: [PATCH] Refactored queue & added tests :zap: --- .github/workflows/release.yml | 34 ++++---- .gitignore | 2 + README.md | 24 ++---- build.zig | 25 +++--- build.zig.zon | 11 +++ src/decode.zig | 41 +++++---- src/encode.zig | 32 +++---- src/main.zig | 155 +++++++++++++++------------------- src/queue.zig | 123 +++++++++++++++------------ src/test.zig | 76 +++++++++++++++++ 10 files changed, 305 insertions(+), 218 deletions(-) create mode 100644 build.zig.zon create mode 100644 src/test.zig diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ff3d314..35bc7e1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -33,25 +33,25 @@ jobs: strategy: matrix: include: - - name: linux-x86_64 - target: x86_64-linux - strip: strip=true - optimize: optimize=ReleaseSafe + - name: linux-x86_64 + target: x86_64-linux + strip: strip=true + optimize: optimize=ReleaseSafe - - name: windows-x86_64 - target: x86_64-windows - strip: strip=true - optimize: optimize=ReleaseSafe + - name: windows-x86_64 + target: x86_64-windows + strip: strip=true + optimize: optimize=ReleaseSafe - - name: macos-aarch64 - target: aarch64-macos - strip: strip=true - optimize: optimize=ReleaseSafe + - name: macos-aarch64 + target: aarch64-macos + strip: strip=true + optimize: optimize=ReleaseSafe - - name: macos-x86 - target: x86_64-macos - strip: strip=true - optimize: optimize=ReleaseSafe + - name: macos-x86 + target: x86_64-macos + strip: strip=true + optimize: optimize=ReleaseSafe runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -81,4 +81,4 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - gh release create v0.5.0 -t "0.5.0" -n "This is the first release right here :zap:" entreepy/entreepy* + gh release create v0.5.1 -t "0.5.1" entreepy/entreepy* diff --git a/.gitignore b/.gitignore index 713ad2b..1bfb56e 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ res/* !res/test.txt !res/nice.shakespeare.txt !res/a_midsummer_nights_dream.txt + +decoded_* diff --git a/README.md b/README.md index 04786c4..09e2533 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ entreepy
[![Actions Status](https://github.com/typio/entreepy/workflows/release/badge.svg)](https://github.com/typio/entreepy/actions) ==== -> ⚡ Huffman compression +> ⚡ Fast huffman coding text compression The name is from entropy coding + binary trees. @@ -15,7 +15,7 @@ Options: -h, --help show help -p, --print print decompressed text to stdout -t, --test test/dry run, does not write to file - -d, --debug print huffman code dictionary and performance times to stdout + -d, --debug print huffman code dictionary and performance times Commands: c compress a file @@ -29,32 +29,28 @@ Examples: entreepy -ptd d text.txt.et ``` -Input file must be < 1 terabyte. I recommend keeping an uncompressed backup or testing the program's -decompression before deleting the original, the program hasn't been robustly tested. Be sure to use -the same version of the program to decompress as compress. +Input file must be < 1 terabyte. I recommend keeping an uncompressed backup or testing the program's decompression before deleting the original, the program hasn't been robustly tested. Be sure to use the same version of the program to decompress as compress. ### Performance - - - + -I've developed a novel approach to decoding that utilizes a decode map. This map is keyed by the integer value of the code and stores a subarray of letters with matching code integer value - that is, the letters that correspond to codes with the same integer value - indexed by length minus one. For example, the map might include the following entries: +I use a decode map which is keyed by the integer value of the code and stores a subarray of letters with matching code integer value - that is, the letters that correspond to codes with the same integer value - indexed by length minus one. For example, the map might include the following entries: `{ 2: [_, a (10), e (010), ...], 13: [_, _, _, _, z (01101), ...] }.` -By utilizing this decode map, decoding can be performed much more quickly than by traversing a binary tree bit by bit. I haven't come across a faster decoding approach than this one. +By utilizing this decode map, decoding can be performed much more quickly than by traversing a binary tree. -#### Current Performance on MacBook Air M2, 8 GB RAM +#### Performance on MacBook Air M2, 8 GB RAM - v0.5.0 | File | Original File Size | Compressed Size | Compression Time | Decompression Time | | ---- | :----------------: | :-------------: | :--------------: | :----------------: | | [Macbeth, Act V, Scene V](https://github.com/typio/entreepy/blob/main/res/nice.shakespeare.txt) | 477 bytes | 374 bytes | 240μs | 950μs | | [A Midsummer Night's Dream](https://github.com/typio/entreepy/blob/main/res/a_midsummer_nights_dream.txt) | ~ 115 KB | ~ 66 KB | 2.2ms | 150ms | | [The Complete Works of Shakespeare](https://ocw.mit.edu/ans7870/6/6.006/s08/lecturenotes/files/t8.shakespeare.txt) | ~ 5.5 MB | ~ 3.2 MB | 0.1s | 7s | -### Compressed File Format (tentative) +### Compressed File Format -Introduces the `.et` file format, identified by the magic number `e7 c0 de`. +Uses the `.et` file format, identified by the magic number `e7 c0 de`. ```bf | magic number -> 3 bytes | @@ -67,6 +63,4 @@ for n symbols | symbol code -> m bits | | packed big-endian bitstream of codes | starting on new byte - -| 0 padding -> <=3 bytes | ``` diff --git a/build.zig b/build.zig index 5475860..f154683 100644 --- a/build.zig +++ b/build.zig @@ -8,16 +8,10 @@ pub fn build(b: *std.Build) void { defer _ = gpa.deinit(); const allocator = gpa.allocator(); - const strip = b.option(bool, "strip", "") orelse false; + const os = @tagName(target.result.os.tag); + const arch = @tagName(target.result.cpu.arch); - // https://ziglang.org/documentation/master/std/src/target.zig.html - const os_table = [_][]const u8{ "freestanding", "ananas", "cloudabi", "dragonfly", "freebsd", "fuchsia", "ios", "kfreebsd", "linux", "lv2", "macos", "netbsd", "openbsd", "solaris", "windows", "zos", "haiku", "minix", "rtems", "nacl", "aix", "cuda", "nvcl", "amdhsa", "ps4", "ps5", "elfiamcu", "tvos", "watchos", "driverkit", "mesa3d", "contiki", "amdpal", "hermit", "hurd", "wasi", "emscripten", "shadermodel", "uefi", "opencl", "glsl450", "vulkan", "plan9", "other" }; - const arch_table = [_][]const u8{ "arm", "armeb", "aarch64", "aarch64_be", "aarch64_32", "arc", "avr", "bpfel", "bpfeb", "csky", "dxil", "hexagon", "loongarch32", "loongarch64", "m68k", "mips", "mipsel", "mips64", "mips64el", "msp430", "powerpc", "powerpcle", "powerpc64", "powerpc64le", "r600", "amdgcn", "riscv32", "riscv64", "sparc", "sparc64", "sparcel", "s390x", "tce", "tcele", "thumb", "thumbeb", "x86", "x86_64", "xcore", "nvptx", "nvptx64", "le32", "le64", "amdil", "amdil64", "hsail", "hsail64", "spir", "spir64", "spirv32", "spirv64", "kalimba", "shave", "lanai", "wasm32", "wasm64", "renderscript32", "renderscript64", "ve", "spu_2" }; - - var os = os_table[@enumToInt(target.getOsTag())]; - var arch = arch_table[@enumToInt(target.getCpuArch())]; - - var name = std.fmt.allocPrint(allocator, "entreepy-{s}-{s}", .{ os, arch }) catch "e"; + const name = std.fmt.allocPrint(allocator, "entreepy-{s}-{s}", .{ os, arch }) catch "e"; defer allocator.free(name); const exe = b.addExecutable(.{ @@ -27,11 +21,10 @@ pub fn build(b: *std.Build) void { .target = target, .optimize = optimize, }); - exe.strip = strip; - exe.install(); + b.installArtifact(exe); - const run_cmd = exe.run(); + const run_cmd = b.addRunArtifact(exe); run_cmd.step.dependOn(b.getInstallStep()); @@ -42,12 +35,14 @@ pub fn build(b: *std.Build) void { const run_step = b.step("run", "Run the app"); run_step.dependOn(&run_cmd.step); - const exe_tests = b.addTest(.{ - .root_source_file = .{ .path = "src/main.zig" }, + const unit_tests = b.addTest(.{ + .root_source_file = .{ .path = "src/test.zig" }, .target = target, .optimize = optimize, }); + const run_unit_tests = b.addRunArtifact(unit_tests); + const test_step = b.step("test", "Run unit tests"); - test_step.dependOn(&exe_tests.step); + test_step.dependOn(&run_unit_tests.step); } diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..202fd4d --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,11 @@ +.{ + .name = "entreepy", + + .version = "0.5.1", + + .dependencies = .{}, + + .minimum_zig_version = "0.12.0", + + .paths = .{""}, +} diff --git a/src/decode.zig b/src/decode.zig index bd3d223..917db8f 100644 --- a/src/decode.zig +++ b/src/decode.zig @@ -2,15 +2,16 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -pub const DecodeFlags = packed struct { +pub const DecodeFlags = struct { write_output: bool = false, print_output: bool = false, debug: bool = false, - _padding: u30 = 0, }; -pub fn decode(allocator: Allocator, compressed_text: []const u8, out_writer: std.fs.File.Writer, -std_out: std.fs.File, flags: DecodeFlags) !void { +// TODO: Add checks for to error if it isnt in valid .et file format (min length) + +pub fn decode(allocator: Allocator, compressed_text: []const u8, out_writer: anytype, std_out: std.fs.File, flags: DecodeFlags) !usize { + var bytes_written: u32 = 0; const start_time = std.time.microTimestamp(); defer if (flags.debug) std_out.writer().print("\ntime taken: {d}μs\n", .{std.time.microTimestamp() - start_time}) catch {}; @@ -19,7 +20,9 @@ std_out: std.fs.File, flags: DecodeFlags) !void { var reading_dict_code_len: bool = false; var reading_dict_code: bool = false; - var decode_dictionary_length: usize = compressed_text[3] + 1; + const decode_dictionary_length: u8 = compressed_text[3] + 1; + + std.debug.print("decode_dictionary_length: {}\n", .{decode_dictionary_length}); var decode_body_length: u32 = compressed_text[4]; decode_body_length <<= 8; @@ -29,6 +32,8 @@ std_out: std.fs.File, flags: DecodeFlags) !void { decode_body_length <<= 8; decode_body_length |= compressed_text[7]; + std.debug.print("decode body length: {}\n", .{decode_body_length}); + var longest_code: u8 = 0; var shortest_code: usize = std.math.maxInt(usize); @@ -56,12 +61,12 @@ std_out: std.fs.File, flags: DecodeFlags) !void { while (i <= 7) { if (pos > 7) break :read; build_bits <<= 1; - build_bits |= (byte >> @truncate(u3, 7 - pos)) & 1; + build_bits |= (byte >> @as(u3, @truncate(7 - pos))) & 1; pos += 1; i += 1; } - current_letter = @truncate(u8, build_bits); + current_letter = @as(u8, @truncate(build_bits)); reading_dict_letter = false; reading_dict_code_len = true; @@ -74,12 +79,12 @@ std_out: std.fs.File, flags: DecodeFlags) !void { while (i <= 7) { if (pos > 7) break :read; build_bits <<= 1; - build_bits |= (byte >> @truncate(u3, 7 - pos)) & 1; + build_bits |= (byte >> @as(u3, @truncate(7 - pos))) & 1; pos += 1; i += 1; } - current_code_length = @truncate(u8, build_bits); + current_code_length = @as(u8, @truncate(build_bits)); if (current_code_length > longest_code) longest_code = current_code_length; if (current_code_length < shortest_code) shortest_code = current_code_length; @@ -95,7 +100,7 @@ std_out: std.fs.File, flags: DecodeFlags) !void { while (i < current_code_length) { if (pos > 7) break :read; build_bits <<= 1; - build_bits |= (byte >> @truncate(u3, 7 - pos)) & 1; + build_bits |= (byte >> @as(u3, @truncate(7 - pos))) & 1; pos += 1; i += 1; @@ -140,7 +145,7 @@ std_out: std.fs.File, flags: DecodeFlags) !void { decode_text: while (window_len >= longest_code) { // loop through all possible code lengths, checking start of window for match checking_code_len = shortest_code; - while (checking_code_len <= longest_code and window_len >= longest_code) { + while (window_len >= checking_code_len) { if (decoded_letters_read >= decode_body_length or window_len < checking_code_len) { @@ -148,21 +153,24 @@ std_out: std.fs.File, flags: DecodeFlags) !void { } testing_code = window & - ((@as(u32, 0b1) << @truncate(u5, checking_code_len)) - 1) << @truncate(u5, window_len - checking_code_len); + ((@as(u32, 0b1) << @as(u5, @truncate(checking_code_len))) - 1) << @as(u5, @truncate(window_len - checking_code_len)); - testing_code >>= @truncate(u6, window_len - checking_code_len); + testing_code >>= @as(u6, @truncate(window_len - checking_code_len)); if (decode_table.get(testing_code)) |entry| { if (entry[checking_code_len - 1] > 0) { - var c = entry[checking_code_len - 1]; + const c = entry[checking_code_len - 1]; - if (flags.write_output) try out_writer.writeByte(c); + if (flags.write_output) { + try out_writer.writeByte(c); + bytes_written += 1; + } if (flags.print_output) try std_out.writer().print("{c}", .{c}); decoded_letters_read += 1; window = window & ((@as(u32, 0b1) << - @truncate(u5, window_len - checking_code_len)) - 1); + @as(u5, @truncate(window_len - checking_code_len))) - 1); window_len -= checking_code_len; checking_code_len = shortest_code; } @@ -171,4 +179,5 @@ std_out: std.fs.File, flags: DecodeFlags) !void { } } } + return bytes_written; } diff --git a/src/encode.zig b/src/encode.zig index 6c04c3a..84a963c 100644 --- a/src/encode.zig +++ b/src/encode.zig @@ -20,7 +20,7 @@ const Node = struct { visited: bool, }; -pub fn encode(allocator: Allocator, text: []const u8, out_writer: std.fs.File.Writer, std_out: std.fs.File, flags: EncodeFlags) !void { +pub fn encode(allocator: Allocator, text: []const u8, out_writer: anytype, std_out: std.fs.File, flags: EncodeFlags) !usize { const start_time = std.time.microTimestamp(); defer if (flags.debug) std_out.writer().print("\ntime taken: {d}μs\n", .{std.time.microTimestamp() - start_time}) catch {}; @@ -49,7 +49,7 @@ pub fn encode(allocator: Allocator, text: []const u8, out_writer: std.fs.File.Wr // occurences is definitionally sorted in ASCII alphabetical order // so ties (1+ c's with same o) with be resolved alphabetically if (o == min_value) { - sorted_letter_book[book_index] = @intCast(u8, c); + sorted_letter_book[book_index] = @intCast(c); if (book_index < 255) book_index += 1; } } @@ -90,7 +90,7 @@ pub fn encode(allocator: Allocator, text: []const u8, out_writer: std.fs.File.Wr lowest_nodes[i] = try leaf_queue.dequeue(); } else if (leaf_queue.count == 0) { lowest_nodes[i] = try sapling_queue.dequeue(); - } else if (leaf_queue.get_front().weight <= sapling_queue.get_front().weight) { + } else if (leaf_queue.peek().?.weight <= sapling_queue.peek().?.weight) { lowest_nodes[i] = try leaf_queue.dequeue(); } else { lowest_nodes[i] = try sapling_queue.dequeue(); @@ -105,7 +105,7 @@ pub fn encode(allocator: Allocator, text: []const u8, out_writer: std.fs.File.Wr .right = lowest_nodes[1], .visited = false, }; - var internal_parent = &nodes[nodes_index].?; + const internal_parent = &nodes[nodes_index].?; nodes_index += 1; lowest_nodes[0].parent = internal_parent; @@ -178,7 +178,7 @@ pub fn encode(allocator: Allocator, text: []const u8, out_writer: std.fs.File.Wr var j: u8 = traverser.path.length; while (j > 0) : (j -= 1) { if (flags.debug) try std_out.writer().print("{b}", .{traverser.path.data >> - @truncate(u4, j - 1) & 1}); + @as(u4, @truncate(j - 1)) & 1}); } if (flags.debug) try std_out.writer().print("\n", .{}); dictionary[traverser.node.symbol orelse unreachable] = traverser.path; @@ -190,7 +190,7 @@ pub fn encode(allocator: Allocator, text: []const u8, out_writer: std.fs.File.Wr var out_buffer = try allocator.alloc(u8, max_header_length + text.len); defer allocator.free(out_buffer); var out_buffer_out = std.io.fixedBufferStream(out_buffer); - var bit_stream_writer = std.io.bitWriter(.Big, out_buffer_out.writer()); + var bit_stream_writer = std.io.bitWriter(.big, out_buffer_out.writer()); var bits_written: usize = 0; @@ -208,9 +208,8 @@ pub fn encode(allocator: Allocator, text: []const u8, out_writer: std.fs.File.Wr bits_written += 8; // write body length - // WARN: really bad hack around big files not decoding - // all the way (not reliable at all) gotta find underlying issue - try bit_stream_writer.writeBits(@truncate(u32, text.len + text.len / 100), 32); + try bit_stream_writer.writeBits(text.len, 32); + std.debug.print("text.len {}", .{text.len}); bits_written += 32; // write dictionary @@ -224,27 +223,28 @@ pub fn encode(allocator: Allocator, text: []const u8, out_writer: std.fs.File.Wr bits_written += 8; var j: usize = code.length; while (j > 0) : (j -= 1) { - try bit_stream_writer.writeBits((code.data >> @truncate(u4, j - 1)) & 1, 1); + try bit_stream_writer.writeBits((code.data >> @as(u4, @truncate(j - 1))) & 1, 1); bits_written += 1; } } } try bit_stream_writer.flushBits(); - // bits_written = bits_written + (8 - bits_written % 10); + bits_written = if (bits_written % 8 != 0) (bits_written / 8 + 1) * 8 else bits_written; // write compressed bits for (text) |char| { - var code = dictionary[char]; + const code = dictionary[char]; var j: usize = code.length; while (j > 0) : (j -= 1) { - try bit_stream_writer.writeBits((code.data >> @truncate(u4, j - 1)) & 1, 1); + try bit_stream_writer.writeBits((code.data >> @as(u4, @truncate(j - 1))) & 1, 1); bits_written += 1; } } try bit_stream_writer.flushBits(); - // bits_written = bits_written + (8 - bits_written % 10); - if (flags.write_output) try out_writer.writeAll(out_buffer[0 .. bits_written / 8 + 3]); + bits_written = if (bits_written % 8 != 0) (bits_written / 8 + 1) * 8 else bits_written; + if (flags.write_output) try out_writer.writeAll(out_buffer[0 .. bits_written / 8]); if (flags.debug) try std_out.writer().print("\nbits in output: {d}\n", .{bits_written}); - //try out_writer.writeAll(out_buffer[0..]); + + return bits_written / 8; } diff --git a/src/main.zig b/src/main.zig index e5b52d5..943ac1b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -9,7 +9,7 @@ const fs = std.fs; const mem = std.mem; const Allocator = mem.Allocator; -const Mode = enum(u2) { +const Mode = enum { None, Compress, Decompress, @@ -40,10 +40,10 @@ fn read_text_file(allocator: Allocator, filepath: []const u8) ![]const u8 { } fn run_cli(allocator: Allocator, std_out: std.fs.File) !Options { - var options = Options{ .print = false, .debug = false, .dry = false, .mode = Mode.None, .file_in_path = undefined, .file_out_path = try allocator.alloc(u8, 0) }; + var options = Options{ .print = false, .debug = false, .dry = false, .mode = Mode.None, .file_in_path = undefined, .file_out_path = undefined }; const help_text = - \\File compression tool using huffman compression + \\Entreepy - Text compression tool \\ \\Usage: entreepy [options] [command] [file] [command options] \\ @@ -65,6 +65,7 @@ fn run_cli(allocator: Allocator, std_out: std.fs.File) !Options { \\ entreepy -ptd d text.txt.et -o decoded_text.txt \\ ; + var args = try std.process.argsWithAllocator(allocator); // skip exe path _ = args.skip(); @@ -75,93 +76,74 @@ fn run_cli(allocator: Allocator, std_out: std.fs.File) !Options { }; if (mem.eql(u8, arg1, "-h") or mem.eql(u8, arg1, "--help")) { - try std_out.writeAll(help_text); + std_out.writeAll(help_text) catch {}; return options; } - var arg = arg1; - while (true) { - // parse initial options - if (arg[0] == '-') { - for (arg[1..]) |c| { - switch (c) { - 'p' => options.print = true, - 'd' => options.debug = true, - 't' => options.dry = true, + const CLIParsingState = enum { reading_normal, reading_out_path, reading_in_path }; + + var cli_parsing_state: CLIParsingState = .reading_normal; + + while (args.next()) |arg| { + switch (cli_parsing_state) { + .reading_normal => { + switch (arg[0]) { '-' => { - if (mem.eql(u8, arg[2..], "print")) { - options.print = true; - break; - } else if (mem.eql(u8, arg[2..], "debug")) { - options.debug = true; - break; - } else if (mem.eql(u8, arg[2..], "test")) { - options.dry = true; - break; + // parse initial options + for (arg[1..]) |c| { + switch (c) { + 'p' => options.print = true, + 'd' => options.debug = true, + 't' => options.dry = true, + 'o' => cli_parsing_state = .reading_out_path, + '-' => { + if (mem.eql(u8, arg[2..], "print")) { + options.print = true; + break; + } else if (mem.eql(u8, arg[2..], "debug")) { + options.debug = true; + break; + } else if (mem.eql(u8, arg[2..], "test")) { + options.dry = true; + break; + } else if (mem.eql(u8, arg[2..], "output")) { + cli_parsing_state = .reading_out_path; + break; + } else { + log.err("invalid option: {s}\n", .{arg}); + return error.InvalidOption; + } + }, + else => { + log.err("invalid option: {s}\n", .{arg}); + return error.InvalidOption; + }, + } + } + }, + 'c', 'd' => { + // parse commands + if (arg[0] == 'c') { + options.mode = Mode.Compress; } else { - log.err("invalid option: {s}\n", .{arg}); - return error.InvalidOption; + options.mode = Mode.Decompress; } + cli_parsing_state = .reading_in_path; }, else => { - log.err("invalid option: {s}\n", .{arg}); - return error.InvalidOption; + log.err("invalid command: {s}\n", .{arg}); + return error.InvalidCommand; }, } - } - arg = args.next() orelse break; - continue; - } - - // parse commands - if (arg[0] == 'c' or arg[0] == 'd') { - if (arg[0] == 'c') { - options.mode = Mode.Compress; - } else if (arg[0] == 'd') { - options.mode = Mode.Decompress; - } - options.file_in_path = args.next() orelse return error.NoInputFile; - arg = args.next() orelse break; - if (arg[0] == '-') { - for (arg[1..]) |c| { - switch (c) { - 'o' => { - // allocate it unnecessarily so we can free it unconditionally - options.file_out_path = - try allocator.dupe(u8, args.next() orelse { - log.err("missing file_in_name after -o\n", .{}); - return error.InvalidCommandArgument; - }); - break; - }, - '-' => { - if (mem.eql(u8, arg[2..], "output")) { - options.file_out_path = try allocator.dupe(u8, args.next() orelse { - log.err("missing file_in_name after --output\n", .{}); - return error.InvalidCommandArgument; - }); - } else { - log.err("invalid option: {s}\n", .{arg}); - return error.InvalidOption; - } - }, - else => { - log.err("invalid option: {c}\n", .{c}); - return error.InvalidOption; - }, - } - } else { - log.err("invalid option: {s}\n", .{arg}); - return error.InvalidOption; - } - } else { - log.err("invalid command: {s}\n", .{arg}); - return error.InvalidCommand; - } - - arg = args.next() orelse { - break; - }; + }, + .reading_in_path => { + options.file_in_path = arg; + cli_parsing_state = .reading_normal; + }, + .reading_out_path => { + options.file_out_path = try allocator.dupe(u8, arg); + cli_parsing_state = .reading_normal; + }, } } @@ -170,13 +152,12 @@ fn run_cli(allocator: Allocator, std_out: std.fs.File) !Options { options.file_out_path = try mem.concat(allocator, u8, &[2][]const u8{ options.file_in_path, ".et" }); } else { - // NOTE: hard to read but it just adds "decoded_" to the front of the file name and - // removes the ".et" extension if its there - var file_in_dir = fs.path.dirname(options.file_in_path) orelse ""; + // removes the ".et" extension if it's there and adds "decoded_" to the front of the file name + const file_in_dir = fs.path.dirname(options.file_in_path) orelse ""; var new_file_name = fs.path.basename(options.file_in_path); if (mem.eql(u8, new_file_name[new_file_name.len - 3 ..], ".et")) new_file_name = new_file_name[0 .. new_file_name.len - 3]; - var decoded_file_name = try mem.concat(allocator, u8, &[2][]const u8{ "decoded_", new_file_name }); + const decoded_file_name = try mem.concat(allocator, u8, &[2][]const u8{ "decoded_", new_file_name }); defer allocator.free(decoded_file_name); options.file_out_path = try fs.path.join(allocator, &[_][]const u8{ file_in_dir, decoded_file_name }); @@ -197,7 +178,7 @@ pub fn main() !void { defer allocator.free(options.file_out_path); if (options.mode == Mode.None) return; - var text_in = try read_text_file(allocator, options.file_in_path); + const text_in = try read_text_file(allocator, options.file_in_path); defer allocator.free(text_in); // Reading seems to add an extra \n at end... not sure actually, @@ -215,9 +196,9 @@ pub fn main() !void { } if (options.mode == Mode.Compress) { - try encode(allocator, text_in, out_writer, std_out, .{ .write_output = !options.dry, .print_output = options.print, .debug = options.debug }); + _ = try encode(allocator, text_in, out_writer, std_out, .{ .write_output = !options.dry, .print_output = options.print, .debug = options.debug }); } else { - try decode(allocator, text_in, out_writer, std_out, .{ .write_output = !options.dry, .print_output = options.print, .debug = options.debug }); + _ = try decode(allocator, text_in, out_writer, std_out, .{ .write_output = !options.dry, .print_output = options.print, .debug = options.debug }); } if (!options.dry) out_file.close(); diff --git a/src/queue.zig b/src/queue.zig index 96d3474..9616715 100644 --- a/src/queue.zig +++ b/src/queue.zig @@ -1,13 +1,13 @@ const std = @import("std"); -// basic circular buffer queue NOTE: .front and .back ranges are questionable -pub fn Queue(comptime T: type, comptime length: usize) type { - const QueueError = error{ - OutOfBounds, - QueueOverflow, - QueueUnderflow, - }; +const QueueError = error{ + OutOfBounds, + QueueFull, + QueueEmpty, +}; +// basic circular buffer queue +pub fn Queue(comptime T: type, comptime length: usize) type { return struct { count: usize = 0, front: usize = 0, @@ -17,78 +17,97 @@ pub fn Queue(comptime T: type, comptime length: usize) type { const Self = @This(); pub fn enqueue(self: *Self, new_value: T) QueueError!void { - if ((self.back + 1) % (self.data.len + 1) == self.front) { - return QueueError.QueueOverflow; - } + if (self.count == self.data.len) return QueueError.QueueFull; - if (self.count == 0) { - self.front = 0; - self.back = 1; - } else { - self.back = (self.back) % self.data.len + 1; - } + self.back = (self.back % self.data.len) + 1; - if (self.back > self.data.len) return QueueError.OutOfBounds; self.data[self.back - 1] = new_value; self.count += 1; } pub fn dequeue(self: *Self) QueueError!T { if (self.count == 0) { - return QueueError.QueueUnderflow; + return QueueError.QueueEmpty; } - if (self.front == self.back - 1) { - const value = self.data[self.front] orelse QueueError.OutOfBounds; - self.front = 0; - self.back = 0; - self.count -= 1; - return value; - } else { - const value = self.data[self.front] orelse QueueError.OutOfBounds; - self.front = (self.front + 1) % self.data.len; - self.count -= 1; - return value; - } + const value = self.data[self.front] orelse QueueError.OutOfBounds; + self.front = (self.front + 1) % self.data.len; + self.count -= 1; + return value; } - pub fn get_front(self: Self) T { - return self.data[self.front].?; + pub fn peek(self: Self) ?T { + if (self.count == 0) return null; + return self.data[self.front]; } }; } -test "queue" { - var q = Queue(u8, 4){}; +test "queue enqueue and peek" { + var q = Queue(u8, 3){}; - // test filling partway then going back to empty - try q.enqueue(4); - try q.enqueue(8); - try std.testing.expectEqual(try q.dequeue(), 4); - try std.testing.expectEqual(try q.dequeue(), 8); + try q.enqueue(42); + try std.testing.expectEqual(@as(?u8, 42), q.peek()); + + try q.enqueue(24); + try std.testing.expectEqual(@as(?u8, 42), q.peek()); +} - // test filling completely - try q.enqueue(7); +test "queue single element" { + var q = Queue(u8, 3){}; + + try q.enqueue(1); + try std.testing.expectEqual(try q.dequeue(), 1); + try std.testing.expectError(QueueError.QueueEmpty, q.dequeue()); +} + +test "queue is full after enqueues" { + var q = Queue(u8, 3){}; + + try q.enqueue(1); try q.enqueue(2); try q.enqueue(3); - try q.enqueue(5); - try std.testing.expectEqual(try q.dequeue(), 7); - try std.testing.expectEqual(try q.dequeue(), 2); - try std.testing.expectEqual(try q.dequeue(), 3); - try std.testing.expectEqual(try q.dequeue(), 5); - // test wrapping + try std.testing.expectError(QueueError.QueueFull, q.enqueue(4)); +} + +test "queue empty after dequeues" { + var q = Queue(u8, 3){}; + try q.enqueue(1); try q.enqueue(2); - try std.testing.expectEqual(try q.dequeue(), 1); try q.enqueue(3); - try q.enqueue(4); - try q.enqueue(5); // wraps and goes in index 0 + _ = try q.dequeue(); + _ = try q.dequeue(); + _ = try q.dequeue(); + + try std.testing.expectError(QueueError.QueueEmpty, q.dequeue()); +} + +test "queue wrap around after full cycle" { + var q = Queue(u8, 3){}; + + try q.enqueue(1); + try q.enqueue(2); + try q.enqueue(3); + try std.testing.expectEqual(try q.dequeue(), 1); try std.testing.expectEqual(try q.dequeue(), 2); + try q.enqueue(4); + try q.enqueue(5); try std.testing.expectEqual(try q.dequeue(), 3); try std.testing.expectEqual(try q.dequeue(), 4); try std.testing.expectEqual(try q.dequeue(), 5); - try q.enqueue(42); - try std.testing.expectEqual(try q.dequeue(), 42); + try std.testing.expectError(QueueError.QueueEmpty, q.dequeue()); +} + +test "queue peek after wrap around" { + var q = Queue(u8, 3){}; + + try q.enqueue(1); + try q.enqueue(2); + try q.enqueue(3); + try std.testing.expectEqual(try q.dequeue(), 1); + try q.enqueue(4); + try std.testing.expectEqual(@as(?u8, 2), q.peek()); } diff --git a/src/test.zig b/src/test.zig new file mode 100644 index 0000000..84433ff --- /dev/null +++ b/src/test.zig @@ -0,0 +1,76 @@ +const std = @import("std"); +const testing = std.testing; + +const encode = @import("encode.zig").encode; +const decode = @import("decode.zig").decode; + +fn round_trip(text_in: []const u8) ![]const u8 { + const stderr = std.io.getStdErr(); + + const encoded_buffer: []u8 = try testing.allocator.alloc(u8, text_in.len * 2); + defer testing.allocator.free(encoded_buffer); + var encoded_stream = std.io.fixedBufferStream(encoded_buffer); + const encoded_writer = encoded_stream.writer(); + + const encoded_len = try encode(testing.allocator, text_in, encoded_writer, stderr, .{ .write_output = true, .print_output = false, .debug = false }); + + const msg = try std.fmt.allocPrint(testing.allocator, "bits encoded {}", .{encoded_len}); + try stderr.writeAll(msg); + testing.allocator.free(msg); + + const decoded_buffer: []u8 = try testing.allocator.alloc(u8, text_in.len * 2); + defer testing.allocator.free(decoded_buffer); + var decoded_stream = std.io.fixedBufferStream(decoded_buffer); + const decoded_writer = decoded_stream.writer(); + + const decoded_len = try decode(testing.allocator, encoded_buffer[0..encoded_len], decoded_writer, stderr, .{ .write_output = true, .print_output = false, .debug = false }); + + const msg2 = try std.fmt.allocPrint(testing.allocator, "\ndecoded buffer: {s}", .{decoded_buffer[0..decoded_len]}); + try stderr.writeAll(msg2); + testing.allocator.free(msg2); + + return try testing.allocator.dupe(u8, decoded_buffer[0..decoded_len]); +} + +test "round trip basic" { + var file = try std.fs.cwd().openFile("res/test.txt", .{}); + defer file.close(); + const text_in = try testing.allocator.alloc(u8, (try file.stat()).size); + try file.reader().readNoEof(text_in); + defer testing.allocator.free(text_in); + + const text_out = try round_trip(text_in); + defer testing.allocator.free(text_out); + + try testing.expectEqualStrings(text_in, text_out); +} + +test "round trip soliloquy" { + var file = try std.fs.cwd().openFile("res/nice.shakespeare.txt", .{}); + defer file.close(); + const text_in = try testing.allocator.alloc(u8, (try file.stat()).size); + try file.reader().readNoEof(text_in); + defer testing.allocator.free(text_in); + + const text_out = try round_trip(text_in); + defer testing.allocator.free(text_out); + + try testing.expectEqualStrings(text_in, text_out); +} + +test "round trip play" { + var file = try std.fs.cwd().openFile("res/a_midsummer_nights_dream.txt", .{}); + defer file.close(); + const text_in = try testing.allocator.alloc(u8, (try file.stat()).size); + try file.reader().readNoEof(text_in); + defer testing.allocator.free(text_in); + + const text_out = try round_trip(text_in); + defer testing.allocator.free(text_out); + + try testing.expectEqualStrings(text_in, text_out); +} + +test "queue" { + _ = @import("queue.zig"); +}