make DataFrames a test-only dependency

ericphanson · Feb 24, 2024 · 0bda72b · 0bda72b
1 parent 4e8d07e
commit 0bda72b
Show file tree

Hide file tree

Showing 5 changed files with 43 additions and 58 deletions.
diff --git a/Project.toml b/Project.toml
@@ -5,7 +5,6 @@ version = "1.0.0-DEV"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
-DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 JuliaSyntax = "70703baa-626e-46a2-a12c-08ffd08c73b4"
 
 [compat]
@@ -18,7 +17,8 @@ julia = "1.10"
 
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "Test"]
+test = ["Aqua", "DataFrames", "Test"]
diff --git a/README.md b/README.md
@@ -42,22 +42,13 @@ julia> using ExplicitImports
 
 julia> print_explicit_imports(ExplicitImports)
 WARNING: both JuliaSyntax and Base export "parse"; uses of it in module ExplicitImports must be qualified
-Module ExplicitImports is relying on implicit imports for 13 names. These could be explicitly imported as follows:
+Module ExplicitImports is relying on implicit imports for 4 names. These could be explicitly imported as follows:
 
 ```julia
 using AbstractTrees: Leaves
 using AbstractTrees: TreeCursor
 using AbstractTrees: children
 using AbstractTrees: nodevalue
-using DataAPI: outerjoin
-using DataFrames: AsTable
-using DataFrames: DataFrame
-using DataFrames: combine
-using DataFrames: groupby
-using DataFrames: select!
-using DataFrames: subset
-using DataFrames: subset!
-using Tables: ByRow
 ```
 
 ````

diff --git a/src/ExplicitImports.jl b/src/ExplicitImports.jl
@@ -2,7 +2,6 @@ module ExplicitImports
 
 using JuliaSyntax, AbstractTrees
 using AbstractTrees: parent
-using DataFrames
 
 export explicit_imports, stale_explicit_imports, print_explicit_imports,
        explicit_imports_single, check_no_implicit_imports, check_no_stale_explicit_imports
@@ -85,11 +84,10 @@ function explicit_imports_single(mod, file=pathof(mod); skips=(Base, Core), warn
         throw(ArgumentError("This appears to be a module which is not defined in package. In this case, the file which defines the module must be passed explicitly as the second argument."))
     end
     all_implicit_imports = find_implicit_imports(mod; skips)
-    df = get_names_used(file)
-    df = restrict_to_module(df, mod)
+    needs_explicit_import, unnecessary_explicit_import = get_names_used(file)
+    restrict_to_module!(needs_explicit_import, mod)
 
-    needs = subset(df, :needs_explicit_import)
-    needed_names = Set(needs.name)
+    needed_names = Set(nt.name for nt in needs_explicit_import)
     filter!(all_implicit_imports) do (k, v)
         k in needed_names || return false
         should_skip(v; skips) && return false
@@ -116,7 +114,8 @@ function explicit_imports_single(mod, file=pathof(mod); skips=(Base, Core), warn
     sort!(to_make_explicit; lt)
 
     if warn
-        unnecessary = unique(sort(subset(df, :unnecessary_explicit_import).name))
+        restrict_to_module!(unnecessary_explicit_import, mod)
+        unnecessary = unique!(sort!([nt.name for nt in unnecessary_explicit_import]))
         if !isempty(unnecessary)
             @warn "Found stale explicit imports in $mod for these names: $unnecessary. To get this list programmatically, call `stale_explicit_imports`. To silence this warning, pass `warn=false`."
         end
@@ -134,10 +133,9 @@ function stale_explicit_imports(mod, file=pathof(mod))
     if isnothing(file)
         throw(ArgumentError("This appears to be a module which is not defined in package. In this case, the file which defines the module must be passed explicitly as the second argument."))
     end
-    df = get_names_used(file)
-    df = restrict_to_module(df, mod)
-    unnecessary = unique(sort(subset(df, :unnecessary_explicit_import).name))
-    return unnecessary
+    _, unnecessary_explicit_import = get_names_used(file)
+    restrict_to_module!(unnecessary_explicit_import, mod)
+    return unique!(sort!([nt.name for nt in unnecessary_explicit_import]))
 end
 
 function has_ancestor(query, target)
@@ -167,7 +165,7 @@ function module_path(mod)
     end
 end
 
-function restrict_to_module(df, mod)
+function restrict_to_module!(set, mod)
     # Limit to only the module of interest. We make some attempt to avoid name collisions
     # (where two nested modules have the same name) by matching on the full path - to some extent.
     # We can't really assume we were given the "top-level" file (I think), so we might not be
@@ -177,8 +175,10 @@ function restrict_to_module(df, mod)
     # This means we cannot distinguish X.Y.X from X in some cases.
     # Don't do that!
     mod_path = module_path(mod)
-    return subset(df,
-                  :module_path => ByRow(ms -> all(Base.splat(isequal), zip(ms, mod_path))))
+    filter!(set) do nt
+        return all(Base.splat(isequal), zip(nt.module_path, mod_path))
+    end
+    return set
 end
 
 # recurse through to find all submodules of `mod`

diff --git a/src/get_names_used.jl b/src/get_names_used.jl
@@ -170,7 +170,7 @@ function analyze_all_names(file; debug=false)
     # since that tells us about assignment
     seen = Set{@NamedTuple{name::Symbol,scope_path::Vector{JuliaSyntax.SyntaxNode}}}()
 
-    explicit_imports = @NamedTuple{name::Symbol,module_path::Vector{Symbol}}[]
+    explicit_imports = Set{@NamedTuple{name::Symbol,module_path::Vector{Symbol}}}()
 
     for leaf in Leaves(cursor)
         JuliaSyntax.kind(nodevalue(leaf).node) == K"Identifier" || continue
@@ -213,7 +213,6 @@ function analyze_all_names(file; debug=false)
         end
     end
 
-    unique!(explicit_imports)
     return per_scope_info, explicit_imports
 end
 
@@ -224,39 +223,27 @@ Figures out which global names are used in `file`, and what modules they are use
 
 Traverses static `include` statements.
 
-Returns a `DataFrame` with four columns:
-* `name`: the name in question
-* `module_path`: the path of modules to access the name, where the first module in the path is the innermost.
-* `needs_explicit_import::Bool`
-* `unnecessary_explicit_import::Bool`
+Returns two `Set{@NamedTuple{name::Symbol,module_path::Vector{Symbol}}}`, namely
+
+* `needs_explicit_import`
+* `unnecessary_explicit_import`
 """
 function get_names_used(file)
     # Here we get 1 row per name per scope
     per_scope_info, explicit_imports = analyze_all_names(file)
-    df = DataFrame(per_scope_info)
-    explicit_imports = DataFrame(explicit_imports)
-    explicit_imports.explicitly_imported .= true
-
-    # further processing...
-    # we want one row per name per module path, not per scope,
-    # so combine over scopes-within-a-module and decide if this name
-    # is being used to refer to a global binding within this module
-    ret = combine(groupby(df, [:name, :module_path]),
-                  [:global_scope, :assigned_first] => function (g, a)
-                      return any(g) || any(!, a)
-                  end => :may_want_to_explicitly_import)
-
-    ret = outerjoin(ret, explicit_imports; on=[:name, :module_path])
-    ret.explicitly_imported = coalesce.(ret.explicitly_imported, false)
-    ret.may_want_to_explicitly_import = coalesce.(ret.may_want_to_explicitly_import, false)
 
-    select!(ret, :name, :module_path,
-            [:may_want_to_explicitly_import, :explicitly_imported] => ByRow() do want, is
-                needs_explicit_import = want && !is
-                unnecessary_explicit_import = !want && is
-                return (; needs_explicit_import, unnecessary_explicit_import)
-            end => AsTable)
-
-    subset!(ret, [:needs_explicit_import, :unnecessary_explicit_import] => ByRow(|))
-    return ret
+    # if a name is used to refer to a global in any scope within a module,
+    # then we may want to explicitly import it.
+    # So we iterate through our scopes and see.
+    names_used_for_global_bindings = Set{@NamedTuple{name::Symbol,
+                                                     module_path::Vector{Symbol}}}()
+    for nt in per_scope_info
+        if nt.global_scope || !nt.assigned_first
+            push!(names_used_for_global_bindings, (; nt.name, nt.module_path))
+        end
+    end
+    # name used to point to a global which was not explicitly imported
+    needs_explicit_import = setdiff(names_used_for_global_bindings, explicit_imports)
+    unnecessary_explicit_import = setdiff(explicit_imports, names_used_for_global_bindings)
+    return (; needs_explicit_import, unnecessary_explicit_import)
 end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,10 +1,17 @@
 using ExplicitImports
-using ExplicitImports: analyze_all_names, has_ancestor, should_skip, restrict_to_module,
+using ExplicitImports: analyze_all_names, has_ancestor, should_skip,
                        module_path, explicit_imports_single, using_statement
 using Test
 using DataFrames
 using Aqua
 
+# DataFrames version of `restrict_to_module!`
+function restrict_to_module(df, mod)
+    mod_path = module_path(mod)
+    return subset(df,
+                  :module_path => ByRow(ms -> all(Base.splat(isequal), zip(ms, mod_path))))
+end
+
 include("Exporter.jl")
 include("TestModA.jl")
 include("test_mods.jl")