Skip to content

Commit

Permalink
Merge pull request #69 from TidierOrg/add_comparison_tests
Browse files Browse the repository at this point in the history
add 50 tests comparing tidierdata w tidierdb
  • Loading branch information
drizk1 authored Oct 2, 2024
2 parents b478b38 + 19e6884 commit 02fc484
Show file tree
Hide file tree
Showing 7 changed files with 250 additions and 13 deletions.
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# TidierDB.jl updates
## v0.4.0 - 2024-10-
## v0.4.1 - 2024-10-02
- Adds 50 tests comparing TidierDB to TidierData to assure accuracy across a complex chains of operations, including combinations of `@mutate`, `@summarize`, `@filter`, `@select`, `@group_by` and `@join` operations.

## v0.4.0 - 2024-10-01
- adds `@create_view`
- adds `drop_view`
- adds support for joining a queried table with anothe queried table
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "TidierDB"
uuid = "86993f9b-bbba-4084-97c5-ee15961ad48b"
authors = ["Daniel Rizk <rizk.daniel.12@gmail.com> and contributors"]
version = "0.4.0"
version = "0.4.1"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Expand Down
8 changes: 4 additions & 4 deletions src/TidierDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -158,13 +158,13 @@ function finalize_query(sqlquery::SQLQuery)
if !isempty(sqlquery.ch_settings) && current_sql_mode[] == clickhouse()
complete_query = complete_query * " \n " * string(sqlquery.ch_settings)
end

complete_query = replace(complete_query, "&&" => " AND ", "||" => " OR ",
"FROM )" => ")" , "SELECT SELECT " => "SELECT ", "SELECT SELECT " => "SELECT ", "DISTINCT SELECT " => "DISTINCT ",
"SELECT SELECT SELECT " => "SELECT ", "PARTITION BY GROUP BY" => "PARTITION BY", "GROUP BY GROUP BY" => "GROUP BY", "HAVING HAVING" => "HAVING",
r"var\"(.*?)\"" => s"\1", r"\"\\\$" => "\"\$")

complete_query = replace(complete_query, ", AS " => " AS ")
r"var\"(.*?)\"" => s"\1", r"\"\\\$" => "\"\$", "WHERE \"" => "WHERE ", "WHERE \"NOT" => "WHERE NOT", "%')\"" =>"%\")", "NULL)\"" => "NULL)",
"NULL))\"" => "NULL))"
)
complete_query = replace(complete_query, ", AS " => " AS ", "OR \"" => "OR ")
if current_sql_mode[] == postgres() || current_sql_mode[] == duckdb() || current_sql_mode[] == mysql() || current_sql_mode[] == mssql() || current_sql_mode[] == clickhouse() || current_sql_mode[] == athena() || current_sql_mode[] == gbq() || current_sql_mode[] == oracle() || current_sql_mode[] == snowflake() || current_sql_mode[] == databricks()
complete_query = replace(complete_query, "\"" => "'", "==" => "=")
end
Expand Down
32 changes: 25 additions & 7 deletions src/db_parsing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -121,15 +121,15 @@ end
function parse_if_else(expr)
transformed_expr = MacroTools.postwalk(expr) do x
# Ensure we're dealing with an Expr object and it's a call to if_else
if isa(x, Expr) && x.head == :call && x.args[1] == :if_else
if isa(x, Expr) && x.head == :call && x.args[1] == :if_else && length(x.args) == 4
# Extract condition, true_case, and false_case from the arguments
condition = x.args[2]
true_case = x.args[3]
false_case = x.args[4]

# Check and format true_case and false_case appropriately
true_case_formatted = isa(true_case, String) ? "'$true_case'" : true_case
false_case_formatted = isa(false_case, String) ? "'$false_case'" : false_case
# Check and handle `missing` cases and formatting for string literals
true_case_formatted = (string(true_case) == "missing") ? "NULL" : (isa(true_case, String) ? "'$true_case'" : true_case)
false_case_formatted = (string(false_case) == "missing") ? "NULL" : (isa(false_case, String) ? "'$false_case'" : false_case)

# Construct the SQL CASE statement as a string
sql_case = "CASE WHEN $(condition) THEN $(true_case_formatted) ELSE $(false_case_formatted) END"
Expand All @@ -144,6 +144,7 @@ function parse_if_else(expr)
return transformed_expr
end


function parse_case_when(expr)
MacroTools.postwalk(expr) do x
# Ensure we're dealing with an Expr object
Expand All @@ -159,14 +160,30 @@ function parse_case_when(expr)
cond = x.args[i]
result = x.args[i + 1]

# Check and format result appropriately
result_formatted = isa(result, String) ? "'$result'" : result
# Handle `missing` by converting it to `NULL`
result_formatted = if result === :missing
"NULL"
elseif isa(result, String)
"'$result'"
else
result
end

# Append the WHEN-THEN part to the SQL CASE expression
push!(sql_case_parts, "WHEN $(cond) THEN $(result_formatted)")
end

# Handle the default case, the last argument
default_result = x.args[end]
default_result_formatted = isa(default_result, String) ? "'$default_result'" : default_result
default_result_formatted = if default_result === :missing
"NULL"
elseif isa(default_result, String)
"'$default_result'"
else
default_result
end

# Append the ELSE part and the END
push!(sql_case_parts, "ELSE $(default_result_formatted) END")

# Combine into a complete SQL CASE statement
Expand All @@ -181,6 +198,7 @@ function parse_case_when(expr)
end
end


#hacky, but only way i could figure out how to get
#the right syntax for starts_with, ends_with, contains
#this is different then the tidy_selection starts_with, ends_with, contains,
Expand Down
6 changes: 6 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[deps]
TidierData = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
TidierDB = "86993f9b-bbba-4084-97c5-ee15961ad48b"
TidierStrings = "248e6834-d0f8-40ef-8fbb-8e711d883e9c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Loading

2 comments on commit 02fc484

@drizk1
Copy link
Member Author

@drizk1 drizk1 commented on 02fc484 Oct 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register

Release notes:

  • Adds 50 tests comparing TidierDB to TidierData to assure accuracy across a complex chains of operations, including combinations of @mutate, @summarize, @filter, @select, @group_by and @join operations.
  • fixes some edge case bugs that came up writing these tests with if_else/case_when assigning missing value

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/116490

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.4.1 -m "<description of version>" 02fc484ce27a5166f1177fa146d0c2b7071c10d4
git push origin v0.4.1

Please sign in to comment.