From b1822be18b218426d42f8bff36d0ef4898db8efb Mon Sep 17 00:00:00 2001 From: Daniel Rizk Date: Tue, 21 Jan 2025 12:25:54 -0500 Subject: [PATCH] will p = md --- src/TidierDB.jl | 6 +++-- src/db_parsing.jl | 36 ++++---------------------- src/docstrings.jl | 64 ++++++++++++++++++++++++++++++++++------------ test/comp_tests.jl | 5 ++-- 4 files changed, 60 insertions(+), 51 deletions(-) diff --git a/src/TidierDB.jl b/src/TidierDB.jl index c772cd2..ef0490f 100644 --- a/src/TidierDB.jl +++ b/src/TidierDB.jl @@ -231,8 +231,8 @@ end - - +# too many backends are tesed here to reasonably include in code coverage +# COV_EXCL_START """ $docstring_db_table """ @@ -365,6 +365,8 @@ function db_table(db, table::Vector{String}, athena_params::Any=nothing) end end +# COV_EXCL_STOP + """ $docstring_copy_to """ diff --git a/src/db_parsing.jl b/src/db_parsing.jl index 6899e4a..899d734 100644 --- a/src/db_parsing.jl +++ b/src/db_parsing.jl @@ -130,32 +130,7 @@ function parse_tidy_db(exprs, metadata::DataFrame) end end - elseif isa(actual_expr, AbstractVector) - for item in actual_expr - col_name = string(item) - if current_sql_mode[] == snowflake() - col_name = uppercase(col_name) - end - if is_excluded - push!(excluded_columns, col_name) - else - push!(included_columns, col_name) - end - end - elseif isa(actual_expr, Tuple) && all(isa.(actual_expr, Vector{Symbol})) - for vec in actual_expr - for item in vec - col_name = string(item)[2:end] - if current_sql_mode[] == snowflake() - col_name = uppercase(col_name) - end - if is_excluded - push!(excluded_columns, col_name) - else - push!(included_columns, col_name) - end - end - end + else error("Unsupported expression type: $expr") end @@ -311,11 +286,8 @@ function parse_case_when(expr) end -#hacky, but only way i could figure out how to get -#the right syntax for starts_with, ends_with, contains -#this is different then the tidy_selection starts_with, ends_with, contains, -#as that relies on matching column names from the metadata dataframe. - +#this fxn is not being tested, bc its only in backends. - i might be able to get rid of it entirely as well +# COV_EXCL_START function parse_char_matching(expr) MacroTools.postwalk(expr) do x if isa(x, Expr) && x.head == :call @@ -371,6 +343,8 @@ function parse_char_matching(expr) return x # Return the expression unchanged if no specific handling applies end end +# COV_EXCL_STOP + function parse_across(expr, metadata) columns_expr, funcs_expr = expr.args[2], expr.args[3] diff --git a/src/docstrings.jl b/src/docstrings.jl index 4409feb..c940117 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -117,6 +117,21 @@ julia> @chain db_table(db, :df_mem) begin ─────┼───────────────── 1 │ aa 0.6 2 │ bb 0.5 + +julia> q = @chain db_table(db, :df_mem) @summarize(mean = mean(value)); + +julia> @eval @chain db_table(db, :df_mem) begin + @filter(value < \$q) + @collect + end +4×4 DataFrame + Row │ id groups value percent + │ String String Int64 Float64 +─────┼──────────────────────────────── + 1 │ AA bb 1 0.1 + 2 │ AB aa 2 0.2 + 3 │ AF aa 1 0.6 + 4 │ AG bb 2 0.7 ``` """ @@ -242,6 +257,25 @@ julia> @chain db_table(db, :df_mem) begin 8 │ AE bb 5 0.5 10 6 9 │ AC bb 3 0.3 9 1 10 │ AA bb 1 0.1 4 missing + +julia> @chain db_table(db, :df_mem) begin + @mutate(across([:value, :percent], agg(kurtosis))) + @collect + end +10×6 DataFrame + Row │ id groups value percent value_kurtosis percent_kurtosis + │ String String Int64 Float64 Float64 Float64 +─────┼────────────────────────────────────────────────────────────────── + 1 │ AA bb 1 0.1 -1.33393 -1.2 + 2 │ AB aa 2 0.2 -1.33393 -1.2 + 3 │ AC bb 3 0.3 -1.33393 -1.2 + 4 │ AD aa 4 0.4 -1.33393 -1.2 + 5 │ AE bb 5 0.5 -1.33393 -1.2 + 6 │ AF aa 1 0.6 -1.33393 -1.2 + 7 │ AG bb 2 0.7 -1.33393 -1.2 + 8 │ AH aa 3 0.8 -1.33393 -1.2 + 9 │ AI bb 4 0.9 -1.33393 -1.2 + 10 │ AJ aa 5 1.0 -1.33393 -1.2 ``` """ @@ -718,6 +752,19 @@ julia> @chain db_table(db, "df_mem") begin 8 │ AG bb 2 0.7 missing missing 9 │ AH aa 3 0.8 missing missing 10 │ AJ aa 5 1.0 missing missing + +julia> @chain db_table(db, "df_mem") begin + @mutate(test = percent * 100) + @left_join("df_join", test <= score, id = id2) + @collect + end; + + +julia> @chain db_table(db, "df_mem") begin + @mutate(test = percent * 200) + @left_join("df_join", closest(test >= score)) # asof join + @collect + end; ``` """ @@ -1650,7 +1697,7 @@ julia> copy_to(db, df, "df1"); julia> @chain db_table(db, "df1") @create_view(viewer); -julia> drop_view(db, viewer); +julia> drop_view(db, "viewer"); ``` """ @@ -1686,21 +1733,6 @@ SQLQuery("", "table", "", "", "", "", "", "", false, false, 2×4 DataFrame ``` """ - -const docstring_drop_view = -""" - drop_view(db, name) - -Drop a view from a database. - -# Arguments -- `db`: The database to drop the view from. -- `name`: The name of the view to drop. - -# Examples -`drop_view(db, "viewer")` -""" - const docstring_warnings = """ warnings(show::Bool) diff --git a/test/comp_tests.jl b/test/comp_tests.jl index 1e6b6b9..5dcea56 100644 --- a/test/comp_tests.jl +++ b/test/comp_tests.jl @@ -11,14 +11,15 @@ TDB_5 = @chain DB.t(test_db) DB.@relocate([groups, value], ends_with("d"), after = percent) DB.@collect TDF_6 = @chain test_df @select(!value) @relocate(groups, ends_with("d"), after = percent) TDB_6 = @chain DB.t(test_db) DB.@select(!value) DB.@relocate(groups, ends_with("d"), after = percent) DB.@collect - + TDF_7 = @chain test_df @select([:id, :value], groups) + TDB_7 = @chain DB.t(test_db) DB.@select([:id, :value], groups) DB.@collect @test all(Array(TDF_1 .== TDB_1)) @test all(Array(TDF_2 .== TDB_2)) @test all(Array(TDF_3 .== TDB_3)) @test all(Array(TDF_4 .== TDB_4)) @test all(Array(TDF_5 .== TDB_5)) @test all(Array(TDF_6 .== TDB_6)) - + @test all(Array(TDF_7 .== TDB_7)) end @testset "Group By Summarize" begin TDF_1 = @chain test_df @group_by(groups) @summarize(value = sum(value), n = n())