From 598bea7053cda6e4ca00f14dad8a18785fd7f766 Mon Sep 17 00:00:00 2001 From: drizk1 Date: Mon, 17 Jun 2024 13:59:40 -0400 Subject: [PATCH] add oracle support via odbc --- NEWS.md | 5 +- src/TidierDB.jl | 33 +++++--- src/parsing_oracle.jl | 174 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 200 insertions(+), 12 deletions(-) create mode 100644 src/parsing_oracle.jl diff --git a/NEWS.md b/NEWS.md index 107749a..b7e999e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # TidierDB.jl updates +## v0.1.7 - 2024-06-17 +- Adds support for Oracle backend via ODBC.jl connection + ## v0.1.6 - 2024-06-11 - Adds `@interpolate` and documentation around building macros with TidierDB chains and interpolation @@ -19,7 +22,7 @@ - Interpolation bug fix to allow interpolating vector of strings with the syntax: `@filter(column_name in [!!vector]) ` ## v0.1.2 - 2024-05-07 -- Adds AWS Athena backend support +- Adds AWS Athena backend support via AWS.jl ## v0.1.1 - 2024-04-12 - Fixes metadata retrieval for MariaDB diff --git a/src/TidierDB.jl b/src/TidierDB.jl index d4bdc59..a4bf278 100644 --- a/src/TidierDB.jl +++ b/src/TidierDB.jl @@ -39,6 +39,7 @@ include("parsing_mssql.jl") include("parsing_clickhouse.jl") include("parsing_athena.jl") include("parsing_gbq.jl") +include("parsing_oracle.jl") include("joins_sq.jl") include("slices_sq.jl") @@ -68,6 +69,8 @@ function expr_to_sql(expr, sq; from_summarize::Bool = false) return expr_to_sql_trino(expr, sq; from_summarize=from_summarize) elseif current_sql_mode[] == :gbq return expr_to_sql_gbq(expr, sq; from_summarize=from_summarize) + elseif current_sql_mode[] == :oracle + return expr_to_sql_oracle(expr, sq; from_summarize=from_summarize) else error("Unsupported SQL mode: $(current_sql_mode[])") end @@ -131,7 +134,7 @@ function finalize_query(sqlquery::SQLQuery) "FROM )" => ")" , "SELECT SELECT " => "SELECT ", "SELECT SELECT " => "SELECT ", "DISTINCT SELECT " => "DISTINCT ", "SELECT SELECT SELECT " => "SELECT ", "PARTITION BY GROUP BY" => "PARTITION BY", "GROUP BY GROUP BY" => "GROUP BY", "HAVING HAVING" => "HAVING", ) - if current_sql_mode[] == :postgres || current_sql_mode[] == :duckdb || current_sql_mode[] == :mysql || current_sql_mode[] == :mssql || current_sql_mode[] == :clickhouse || current_sql_mode[] == :athena || current_sql_mode[] == :gbq + if current_sql_mode[] == :postgres || current_sql_mode[] == :duckdb || current_sql_mode[] == :mysql || current_sql_mode[] == :mssql || current_sql_mode[] == :clickhouse || current_sql_mode[] == :athena || current_sql_mode[] == :gbq || current_sql_mode[] == :oracle complete_query = replace(complete_query, "\"" => "'", "==" => "=") end @@ -200,20 +203,28 @@ end # MSSQL function get_table_metadata(conn::ODBC.Connection, table_name::String) - # Query to get column names and types from INFORMATION_SCHEMA - query = """ - SELECT column_name, data_type - FROM information_schema.columns - WHERE table_name = '$table_name' - ORDER BY ordinal_position; - """ + if current_sql_mode[] == :oracle + table_name = uppercase(table_name) + query = """ + SELECT column_name, data_type + FROM all_tab_columns + WHERE table_name = '$table_name' + ORDER BY column_id + """ + else + query = """ + SELECT column_name, data_type + FROM information_schema.columns + WHERE table_name = '$table_name' + ORDER BY ordinal_position; + """ + end result = DBInterface.execute(conn, query) |> DataFrame - #result[!, :DATA_TYPE] = map(x -> String(x), result.DATA_TYPE) result[!, :current_selxn] .= 1 result[!, :table_name] .= table_name # Adjust the select statement to include the new table_name column - return select(result, :column_name => :name, :data_type => :type, :current_selxn, :table_name,) + return select(result, :column_name => :name, :data_type => :type, :current_selxn, :table_name) end # ClickHouse @@ -238,7 +249,7 @@ function db_table(db, table, athena_params::Any=nothing) table_name = string(table) metadata = if current_sql_mode[] == :lite get_table_metadata(db, table_name) - elseif current_sql_mode[] == :postgres || current_sql_mode[] == :duckdb || current_sql_mode[] == :mysql || current_sql_mode[] == :mssql || current_sql_mode[] == :clickhouse || current_sql_mode[] == :gbq + elseif current_sql_mode[] == :postgres || current_sql_mode[] == :duckdb || current_sql_mode[] == :mysql || current_sql_mode[] == :mssql || current_sql_mode[] == :clickhouse || current_sql_mode[] == :gbq || current_sql_mode[] == :oracle get_table_metadata(db, table_name) elseif current_sql_mode[] == :athena get_table_metadata_athena(db, table_name, athena_params) diff --git a/src/parsing_oracle.jl b/src/parsing_oracle.jl new file mode 100644 index 0000000..75d898c --- /dev/null +++ b/src/parsing_oracle.jl @@ -0,0 +1,174 @@ +function expr_to_sql_oracle(expr, sq; from_summarize::Bool) + expr = parse_char_matching(expr) + expr = exc_capture_bug(expr, names_to_modify) + MacroTools.postwalk(expr) do x + # Handle basic arithmetic and functions + if @capture(x, a_ + b_) + return :($a + $b) + elseif @capture(x, a_ - b_) + return :($a - $b) + elseif @capture(x, a_ * b_) + return :($a * $b) + elseif @capture(x, a_ / b_) + return :($a / $b) + elseif @capture(x, a_ ^ b_) + return :(POWER($a, $b)) + elseif @capture(x, round(a_)) + return :(ROUND($a)) + elseif @capture(x, round(a_, b_)) + return :(ROUND($a, $b)) + elseif @capture(x, mean(a_)) + if from_summarize + return :(AVG($a)) + else + window_clause = construct_window_clause(sq) + return "AVG($(string(a))) $(window_clause)" + end + elseif @capture(x, minimum(a_)) + if from_summarize + return :(MIN($a)) + else + window_clause = construct_window_clause(sq) + return "MIN($(string(a))) $(window_clause)" + end + elseif @capture(x, maximum(a_)) + if from_summarize + return :(MAX($a)) + else + window_clause = construct_window_clause(sq) + return "MAX($(string(a))) $(window_clause)" + end + elseif @capture(x, sum(a_)) + if from_summarize + return :(SUM($a)) + else + window_clause = construct_window_clause(sq) + return "SUM($(string(a))) $(window_clause)" + end + elseif @capture(x, cumsum(a_)) + if from_summarize + error("cumsum is only available through a windowed @mutate") + else + # sq.windowFrame = "ROWS UNBOUNDED PRECEDING " + window_clause = construct_window_clause(sq, from_cumsum = true) + return "SUM($(string(a))) $(window_clause)" + end + #stats agg + elseif @capture(x, std(a_)) + if from_summarize + return :(STDDEV_SAMP($a)) + else + window_clause = construct_window_clause(sq, ) + return "STDDEV_SAMP($(string(a))) $(window_clause)" + end + elseif @capture(x, cor(a_, b_)) + if from_summarize + return :(CORR($a)) + else + window_clause = construct_window_clause(sq) + return "CORR($(string(a))) $(window_clause)" + end + elseif @capture(x, cov(a_, b_)) + if from_summarize + return :(COVAR_SAMP($a)) + else + window_clause = construct_window_clause(sq) + return "COVAR_SAMP($(string(a))) $(window_clause)" + end + elseif @capture(x, var(a_)) + if from_summarize + return :(VAR_SAMP($a)) + else + window_clause = construct_window_clause(sq) + return "VAR_SAMP($(string(a))) $(window_clause)" + end + #elseif @capture(x, sql_agg(str_)) + # if from_summarize + # return error("sql_agg is only needed with aggregate functions in @mutate") + # else + # window_clause = construct_window_clause(sq) + # return "$(str) $(window_clause)" + # end + #stringr functions, have to use function that removes _ so capture can capture name + elseif @capture(x, strreplaceall(str_, pattern_, replace_)) + return :(REPLACE($str, $pattern, $replace)) + elseif @capture(x, strreplace(str_, pattern_, replace_)) + return error("str_replace is not yet supported for oracle support. Only str_replace_all") + elseif @capture(x, strremoveall(str_, pattern_)) + return :(REPLACE($str, $pattern, "")) + elseif @capture(x, strremove(str_, pattern_)) + return error("str_remove is not yet supported for oracle support. Only str_remove_all ") + elseif @capture(x, ismissing(a_)) + return "($(string(a)) IS NULL)" + # Date extraction functions + elseif @capture(x, year(a_)) + return "DATE(YEAR FROM " * string(a) * ")" + elseif @capture(x, month(a_)) + return "DATE(MONTH FROM " * string(a) * ")" + elseif @capture(x, day(a_)) + return "DATE(DAY FROM " * string(a) * ")" + elseif @capture(x, hour(a_)) + return "DATE(HOUR FROM " * string(a) * ")" + elseif @capture(x, minute(a_)) + return "DATE(MINUTE FROM " * string(a) * ")" + elseif @capture(x, second(a_)) + return "DATE(SECOND FROM " * string(a) * ")" + elseif @capture(x, floordate(time_column_, unit_)) + return floordate_to_oracle(unit, time_column) + elseif @capture(x, difftime(endtime_, starttime_, unit_)) + return :(DATE_DIFF($unit, $starttime, $endtime)) + elseif @capture(x, replacemissing(column_, replacement_value_)) + return :(COALESCE($column, $replacement_value)) + elseif @capture(x, missingif(column_, value_to_replace_)) + return :(NULLIF($column, $value_to_replace)) + elseif isa(x, Expr) && x.head == :call + if x.args[1] == :if_else && length(x.args) == 4 + return parse_if_else(x) + elseif x.args[1] == :as_float && length(x.args) == 2 + column = x.args[2] + return "CAST(" * string(column) * " AS DECIMAL)" + elseif x.args[1] == :as_integer && length(x.args) == 2 + column = x.args[2] + return "CAST(" * string(column) * " AS INTEGER)" + elseif x.args[1] == :as_string && length(x.args) == 2 + column = x.args[2] + return "CAST(" * string(column) * " AS STRING)" + elseif x.args[1] == :case_when + return parse_case_when(x) + elseif isa(x, Expr) && x.head == :call && x.args[1] == :! && x.args[1] != :!= && length(x.args) == 2 + inner_expr = expr_to_sql_oracle(x.args[2], sq, from_summarize = false) # Recursively transform the inner expression + return string("NOT (", inner_expr, ")") + elseif x.args[1] == :str_detect && length(x.args) == 3 + column, pattern = x.args[2], x.args[3] + return string(column, " LIKE \'%", pattern, "%'") + elseif isa(x, Expr) && x.head == :call && x.args[1] == :n && length(x.args) == 1 + return "COUNT(*)" + end + end + return x + end +end + + +function floordate_to_oracle(unit::String, time_column::Symbol) + sql_command = "" + + if unit == "second" + # Flooring to the nearest second requires a different anchor, like '2000-01-01' + sql_command = "DATEADD(SECOND, DATEDIFF(SECOND, '2000-01-01', $(string(time_column))), '2000-01-01')" + elseif unit == "minute" + sql_command = "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, $(string(time_column))), 0)" + elseif unit == "hour" + sql_command = "DATEADD(HOUR, DATEDIFF(HOUR, 0, $(string(time_column))), 0)" + elseif unit == "day" + sql_command = "DATEADD(DAY, DATEDIFF(DAY, 0, $(string(time_column))), 0)" + elseif unit == "month" + sql_command = "DATEADD(MONTH, DATEDIFF(MONTH, 0, $(string(time_column))), 0)" + elseif unit == "year" + sql_command = "DATEADD(YEAR, DATEDIFF(YEAR, 0, $(string(time_column))), 0)" + else + throw(ArgumentError("Unsupported unit: $unit")) + end + + return sql_command +end \ No newline at end of file