diff --git a/CHANGES.md b/CHANGES.md index 1257adc..588e1f9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## New in main * Add support for tasks depending on directories +* New `fast_dirs` mode in TaskManager for cheaper directory checks * Add progress callback * Add support for passing blocks to Tasks: diff --git a/TODO.md b/TODO.md index c4434f3..91ab8a9 100644 --- a/TODO.md +++ b/TODO.md @@ -6,9 +6,9 @@ [Fiber Metrics](https://github.com/didactic-drunk/fiber_metrics.cr) * Add wildcard dependencies (depend on all files / tasks matching a pattern) * Mark tasks as stale if the OUTPUT is modified since last run -* Allow a "shallow" mode for directory dependencies, which hashes just a list of contents - and not the contents of the files themselves. +* ~~Allow a "shallow" mode for directory dependencies, which hashes just a list of contents~~ + ~~and not the contents of the files themselves.~~ * ~~Add directory dependencies (depend on all files in the tree)~~ * ~~Fix parallel `run_all` flag~~ * ~~Add a faster stale input check using file dates instead of hashes (like make)~~ diff --git a/spec/croupier_spec.cr b/spec/croupier_spec.cr index 66bfd04..65df3d3 100644 --- a/spec/croupier_spec.cr +++ b/spec/croupier_spec.cr @@ -785,6 +785,33 @@ describe "TaskManager" do id: "t1" ) TaskManager.scan_inputs.should eq({"dir" => "da39a3ee5e6b4b0d3255bfef95601890afd80709"}) + File.write("dir/input", "foo") + TaskManager.scan_inputs.should eq({"dir" => "7e0dbd57e84798fe1a40bb453dcf51f2569a3a2e"}) + # This mode doesn't ignore file contents + File.write("dir/input", "bar") + TaskManager.scan_inputs.should eq({"dir" => "63734eec74b627be2865c006e11b270747b4df2c"}) + Dir.mkdir("dir/dir1") + TaskManager.scan_inputs.should eq({"dir" => "05cfe846d65930c8a0be1da2b8aa16ee21d0cbdd"}) + end + end + it "should hash directories in fast_dirs mode" do + with_scenario("empty") do + TaskManager.fast_dirs = true + Dir.mkdir("dir") + Task.new( + inputs: ["dir"], + always_run: true, + proc: nil, + id: "t1" + ) + TaskManager.scan_inputs.should eq({"dir" => "da39a3ee5e6b4b0d3255bfef95601890afd80709"}) + File.write("dir/input", "foo") + TaskManager.scan_inputs.should eq({"dir" => "18e96066fa04ae6c67b5cdcfb02c7c5646ae2402"}) + # This mode ignores file contents + File.write("dir/input", "bar") + TaskManager.scan_inputs.should eq({"dir" => "18e96066fa04ae6c67b5cdcfb02c7c5646ae2402"}) + Dir.mkdir("dir/dir1") + TaskManager.scan_inputs.should eq({"dir" => "f6fa5320de20f424aaab984f56d470386ca9cb96"}) end end end diff --git a/src/croupier.cr b/src/croupier.cr index adfc644..92c71c7 100644 --- a/src/croupier.cr +++ b/src/croupier.cr @@ -108,7 +108,7 @@ module Croupier # Refuse to merge if this task or any of the colliding ones # are not mergeable raise "Can't merge task #{self} with #{to_merge[..-2].map(&.to_s)}" \ - if to_merge.size > 1 && to_merge.any? { |t| !t.mergeable? } + if to_merge.size > 1 && to_merge.any? { |t| !t.mergeable? } reduced = to_merge.reduce { |t1, t2| t1.merge t2 } reduced.keys.each { |k| TaskManager.tasks[k] = reduced } end @@ -302,6 +302,8 @@ module Croupier property? fast_mode : Bool = false # If true, it's running in auto mode property? auto_mode : Bool = false + # If true, directories depend on a list of files, not its contents + property? fast_dirs : Bool = false # If set, it's called after every task finishes property progress_callback : Proc(String, Nil) = ->(_id : String) {} @@ -502,9 +504,13 @@ module Croupier else if File.directory? path digest = Digest::SHA1.digest do |ctx| - # Hash *everything* in the directory (this will be slow) - Dir.glob("#{path}/**/*").each do |f| - ctx.update File.read(path) if File.file? f + # Hash the directory tree + ctx.update(Dir.glob("#{path}/**/*").join("\n")) + if !@fast_dirs + # Hash *everything* in the directory (this will be slow) + Dir.glob("#{path}/**/*").each do |f| + ctx.update File.read(f) if File.file? f + end end end hash[path] = digest.hexstring @@ -530,7 +536,7 @@ module Croupier !File.exists?(input) } raise "Can't run: Unknown inputs #{bad_inputs.join(", ")}" \ - unless bad_inputs.empty? + unless bad_inputs.empty? end # Run all stale tasks in dependency order