From b3868a92544b20ec04dbacfd93cd18624ac5627d Mon Sep 17 00:00:00 2001 From: Daniel Rizk Date: Tue, 21 Jan 2025 09:49:15 -0500 Subject: [PATCH] increase code coverage --- NEWS.md | 3 ++ Project.toml | 2 +- README.md | 4 +-- src/TBD_macros.jl | 3 +- src/docstrings.jl | 70 +++++++++++++++++++++++++++++++++++++++++++-- src/view_compute.jl | 4 +++ test/comp_tests.jl | 7 +++-- 7 files changed, 84 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index de71742..e3f0dac 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,7 @@ # TidierDB.jl updates +## v0.6.3 - 2025-01-20 +- Resolve issue when filtering immediately after joining + ## v0.6.2 - 2025-01-09 - adds `@intersect` and `@setdiff` (SQLs `INTERSECT` and `EXCEPT`) respectively, with optional `all` argument - adds support for `all` arg to `@union` (equivalent to `@union_all`) diff --git a/Project.toml b/Project.toml index 5e3c57c..6e94146 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TidierDB" uuid = "86993f9b-bbba-4084-97c5-ee15961ad48b" authors = ["Daniel Rizk and contributors"] -version = "0.6.2" +version = "0.6.3" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" diff --git a/README.md b/README.md index 35cb185..baf7b5c 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/TidierOrg/TidierDB.jl/blob/main/LICENSE) [![Docs: Latest](https://img.shields.io/badge/Docs-Latest-blue.svg)](https://tidierorg.github.io/TidierDB.jl/latest) [![Downloads](https://img.shields.io/badge/dynamic/json?url=http%3A%2F%2Fjuliapkgstats.com%2Fapi%2Fv1%2Fmonthly_downloads%2FTidierDB&query=total_requests&suffix=%2Fmonth&label=Downloads)](http://juliapkgstats.com/pkg/TidierDB) -[![Coverage Status](https://coveralls.io/repos/github/TidierOrg/TidierDB.jl/badge.svg?branch=main)](https://coveralls.io/github/TidierOrg/TidierDB.jl?branch=main) +[![Coverage Status](https://coveralls.io/repos/github/TidierOrg/TidierDB.jl/badge.svg?branch=main&kill=1)](https://coveralls.io/github/TidierOrg/TidierDB.jl?branch=main&kill=1) @@ -39,7 +39,7 @@ TidierDB.jl currently supports the following top-level macros: | **Category** | **Supported Macros and Functions** | |----------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | **Data Manipulation** | `@arrange`, `@group_by`, `@filter`, `@select`, `@mutate` (supports `across`), `@summarize`/`@summarise` (supports `across`), `@distinct`, `@relocate` | -| **Joining** | `@left_join`, `@right_join`, `@inner_join`, `@anti_join`, `@full_join`, `@semi_join`, `@union`, `@union_all`, `@intersect`, `@setdiff` | +| **Joining/Setting** | `@left_join`, `@right_join`, `@inner_join`, `@anti_join`, `@full_join`, `@semi_join`, `@union`, `@union_all`, `@intersect`, `@setdiff` | | **Slice and Order** | `@slice_min`, `@slice_max`, `@slice_sample`, `@order`, `@window_order`, `@window_frame` | | **Utility** | `@show_query`, `@collect`, `@head`, `@count`, `show_tables`, `@create_view` , `drop_view` | | **Helper Functions** | `across`, `desc`, `if_else`, `case_when`, `n`, `starts_with`, `ends_with`, `contains`, `as_float`, `as_integer`, `as_string`, `is_missing`, `missing_if`, `replace_missing` | diff --git a/src/TBD_macros.jl b/src/TBD_macros.jl index 4118d04..4792500 100644 --- a/src/TBD_macros.jl +++ b/src/TBD_macros.jl @@ -399,6 +399,7 @@ function final_collect(sqlquery::SQLQuery, ::Type{<:duckdb}) return DataFrame(result) end +# COV_EXCL_START function final_collect(sqlquery::SQLQuery, ::Type{<:databricks}) final_query = finalize_query(sqlquery) result = execute_databricks(sqlquery.db, final_query) @@ -410,7 +411,7 @@ function final_collect(sqlquery::SQLQuery, ::Type{<:snowflake}) result = execute_snowflake(sqlquery.db, final_query) return DataFrame(result) end -#using TidierDB + function stream_collect(sqlquery::SQLQuery) final_query = finalize_query(sqlquery) res = DBInterface.execute(sqlquery.db, final_query, DuckDB.StreamResult) diff --git a/src/docstrings.jl b/src/docstrings.jl index 274466d..4409feb 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -378,8 +378,17 @@ julia> copy_to(db, df, "df_mem"); julia> @chain db_table(db, :df_mem) begin @group_by(groups) @slice_min(value, n = 2) + @arrange(groups, percent) # arranged due to duckdb multi threading @collect - end; + end +4×5 DataFrame + Row │ id groups value percent rank_col + │ String String Int64 Float64 Int64 +─────┼────────────────────────────────────────── + 1 │ AB aa 2 0.2 2 + 2 │ AF aa 1 0.6 1 + 3 │ AA bb 1 0.1 1 + 4 │ AG bb 2 0.7 2 julia> @chain db_table(db, :df_mem) begin @slice_min(value) @@ -391,6 +400,17 @@ julia> @chain db_table(db, :df_mem) begin ─────┼────────────────────────────────────────── 1 │ AA bb 1 0.1 1 2 │ AF aa 1 0.6 1 + +julia> @chain db_table(db, :df_mem) begin + @filter(percent > .1) + @slice_min(percent) + @collect + end +1×5 DataFrame + Row │ id groups value percent rank_col + │ String String Int64 Float64 Int64 +─────┼────────────────────────────────────────── + 1 │ AB aa 2 0.2 1 ``` """ @@ -419,8 +439,17 @@ julia> copy_to(db, df, "df_mem"); julia> @chain db_table(db, :df_mem) begin @group_by(groups) @slice_max(value, n = 2) + @arrange(groups) @collect - end; + end +4×5 DataFrame + Row │ id groups value percent rank_col + │ String String Int64 Float64 Int64 +─────┼────────────────────────────────────────── + 1 │ AJ aa 5 1.0 1 + 2 │ AD aa 4 0.4 2 + 3 │ AE bb 5 0.5 1 + 4 │ AI bb 4 0.9 2 julia> @chain db_table(db, :df_mem) begin @slice_max(value) @@ -432,6 +461,17 @@ julia> @chain db_table(db, :df_mem) begin ─────┼────────────────────────────────────────── 1 │ AE bb 5 0.5 1 2 │ AJ aa 5 1.0 1 + +julia> @chain db_table(db, :df_mem) begin + @filter(percent < .9) + @slice_max(percent) + @collect + end +1×5 DataFrame + Row │ id groups value percent rank_col + │ String String Int64 Float64 Int64 +─────┼────────────────────────────────────────── + 1 │ AH aa 3 0.8 1 ``` """ @@ -1567,7 +1607,7 @@ julia> @chain t(df1_table) @setdiff(df2_table, all = true) @collect const docstring_create_view = """ - @view(sql_query, name, replace = true) + @create_view(sql_query, name, replace = true) Create a view from a SQL query. Currently supports DuckDB, MySQL, GBQ, Postgres @@ -1590,6 +1630,30 @@ julia> db_table(db, "viewer"); ``` """ +const docstring_drop_view = +""" + drop_view(sql_query, name) + +Drop a view. Currently supports DuckDB, MySQL, GBQ, Postgres + +# Arguments +- `sql_query`: The SQL query to create a view from. +- `name`: The name of the view to drop. + +# Examples +```jldoctest +julia> db = connect(duckdb()); + +julia> df = DataFrame(id = [1, 2, 3], value = [10, 20, 30]); + +julia> copy_to(db, df, "df1"); + +julia> @chain db_table(db, "df1") @create_view(viewer); + +julia> drop_view(db, viewer); +``` +""" + const docstring_compute = """ @compute(sql_query, name, replace = false) diff --git a/src/view_compute.jl b/src/view_compute.jl index 485b5a9..6abd72b 100644 --- a/src/view_compute.jl +++ b/src/view_compute.jl @@ -35,6 +35,10 @@ macro create_view(sqlquery, name, replace = true) end end + +""" +$docstring_drop_view +""" function drop_view(db, name) DBInterface.execute(db, "DROP VIEW $name") end diff --git a/test/comp_tests.jl b/test/comp_tests.jl index 7d5d84d..1e6b6b9 100644 --- a/test/comp_tests.jl +++ b/test/comp_tests.jl @@ -234,12 +234,15 @@ TBD_1 = @chain DB.t(test_db) DB.@count(groups) DB.@collect TDF_2 = @chain test_df @count(groups, id) @arrange(groups, id) TBD_2 = @chain DB.t(test_db) DB.@count(groups, id) DB.@arrange(groups, id) DB.@collect + TDF_3 = @chain test_df @mutate(sum = sum(value)) + TBD_3 = @chain DB.t(test_db) DB.@mutate(sum = sum(value)) DB.@collect @test all(isequal.(Array(TDF_1), Array(TBD_1))) @test all(isequal.(Array(TDF_2), Array(TBD_2))) + @test all(isequal.(Array(TDF_3), Array(TBD_3))) end @testset "Date Parsing" begin - TDF_1 = @chain test_df @mutate(test = ymd_hms("2023-06-15 00:00:00")) - TDB_1 = @chain DB.t(test_db) DB.@mutate(test = ymd("2023-06-15")) DB.@collect + TDF_1 = @chain test_df @mutate(test = ymd_hms("2023-06-15 00:00:00"), test2 = mdy_hms("06-12-2023 00:00:00")) @mutate(y = year(test), m = month(test), d = day(test), h = hour(test), mi = minute(test), s = second(test), floor = floor_date(test, "month"), dif = difftime(test, test2, "hours")) + TDB_1 = @chain DB.t(test_db) DB.@mutate(test = ymd("2023-06-15"), test2 = mdy("06-12-2023")) DB.@mutate(y = year(test), m = month(test), d = day(test), h = hour(test), mi = minute(test), s = second(test), floor = floor_date(test, "month"), dif = difftime(test, test2, "hours")) DB.@collect # Filter by date TDF_2 = @chain test_df @mutate(test = ymd_hms("2023-06-15 00:00:00")) @filter(test < ymd("2023-04-14")) TDB_2 = @chain DB.t(test_db) DB.@mutate(test = ymd("2023-06-15")) DB.@filter(test < ymd("2023-04-14")) DB.@collect