Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allows for table.name (no tidyselxn for table.name style yet) #8

Merged
merged 2 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/examples/UserGuide/key_differences.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ copy_to(db, df, "df_mem"); # copying over the data frame to an in-memory databas
@collect
end

# Notice the difference between the summarized tables above and below.

@chain db_table(db, :df_mem) begin
@group_by(groups)
@mutate(max = maximum(percent), min = minimum(percent))
Expand Down
30 changes: 23 additions & 7 deletions src/TBD_macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,24 @@ macro select(sqlquery, exprs...)
columns_str = join(["SELECT ", join([string(column) for column in columns], ", ")])
$(esc(sqlquery)).select = columns_str
$(esc(sqlquery)).metadata.current_selxn .= 0
selected_indices = indexin(columns, $(esc(sqlquery)).metadata.name)
$(esc(sqlquery)).metadata.current_selxn[selected_indices[.!isnothing.(selected_indices)]] .= 1
for col in columns
if occursin(".", col)
table_col_split = split(col, ".")
table_name, col_name = table_col_split[1], table_col_split[2]

# Iterate and update current_selxn based on matches
for idx in eachindex($(esc(sqlquery)).metadata.current_selxn)
if $(esc(sqlquery)).metadata.table_name[idx] == table_name &&
$(esc(sqlquery)).metadata.name[idx] == col_name
$(esc(sqlquery)).metadata.current_selxn[idx] = 1
end
end
else
# Direct matching for columns without 'table.' prefix
matching_indices = findall($(esc(sqlquery)).metadata.name .== col)
$(esc(sqlquery)).metadata.current_selxn[matching_indices] .= 1
end
end
end

$(esc(sqlquery))
Expand Down Expand Up @@ -138,7 +154,7 @@ end



function process_mutate_expression(expr, sq, select_expressions)
function process_mutate_expression(expr, sq, select_expressions, cte_name)
if isa(expr, Expr) && expr.head == :(=) && isa(expr.args[1], Symbol)
col_name = string(expr.args[1])
col_expr = expr_to_sql(expr.args[2], sq) # Convert to SQL expression
Expand All @@ -152,7 +168,7 @@ function process_mutate_expression(expr, sq, select_expressions)
# Append the mutation as a new column expression
push!(select_expressions, string(col_expr, " AS ", col_name))
# Update metadata to include this new column
push!(sq.metadata, Dict("name" => col_name, "type" => "UNKNOWN", "current_selxn" => 1))
push!(sq.metadata, Dict("name" => col_name, "type" => "UNKNOWN", "current_selxn" => 1, "table_name" => cte_name))
end
else
throw("Unsupported expression format in @mutate: $(expr)")
Expand Down Expand Up @@ -220,10 +236,10 @@ macro mutate(sqlquery, mutations...)
end
if isa(expr, Expr) && expr.head == :tuple
for subexpr in expr.args
process_mutate_expression(subexpr, sq, select_expressions)
process_mutate_expression(subexpr, sq, select_expressions, cte_name)
end
else
process_mutate_expression(expr, sq, select_expressions)
process_mutate_expression(expr, sq, select_expressions, cte_name)
end
end
cte_sql = " " * join(select_expressions, ", ") * " FROM " * sq.from
Expand Down Expand Up @@ -350,7 +366,7 @@ function process_summary_expression(expr, sq, summary_str)
summary_operation = string(summary_operation)
summary_column = expr_to_sql(expr.args[1], sq, from_summarize = true)
summary_column = string(summary_column)
push!(sq.metadata, Dict("name" => summary_column, "type" => "UNKNOWN", "current_selxn" => 1))
push!(sq.metadata, Dict("name" => summary_column, "type" => "UNKNOWN", "current_selxn" => 1, "table_name" => sq.from))

push!(summary_str, summary_operation * " AS " * summary_column)
else
Expand Down
45 changes: 28 additions & 17 deletions src/TidierDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,6 @@ end




function get_table_metadata(db::SQLite.DB, table_name::String)
query = "PRAGMA table_info($table_name);"
result = SQLite.DBInterface.execute(db, query) |> DataFrame
result[!, :current_selxn] .= 1
resize!(result.current_selxn, nrow(result))
return select(result, 2 => :name, 3 => :type, :current_selxn)
end


function finalize_ctes(ctes::Vector{CTE})
if isempty(ctes)
return ""
Expand Down Expand Up @@ -139,6 +129,16 @@ function finalize_query(sqlquery::SQLQuery)
end


function get_table_metadata(db::SQLite.DB, table_name::String)
query = "PRAGMA table_info($table_name);"
result = SQLite.DBInterface.execute(db, query) |> DataFrame
result[!, :current_selxn] .= 1
resize!(result.current_selxn, nrow(result))
result[!, :table_name] .= table_name
# Adjust the select statement to include the new table_name column
return DataFrames.select(result, 2 => :name, 3 => :type, :current_selxn, :table_name)
end

function get_table_metadata(conn::LibPQ.Connection, table_name::String)
query = """
SELECT column_name, data_type
Expand All @@ -148,9 +148,12 @@ function get_table_metadata(conn::LibPQ.Connection, table_name::String)
"""
result = LibPQ.execute(conn, query) |> DataFrame
result[!, :current_selxn] .= 1
return select(result, 1 => :name, 2 => :type, :current_selxn)
result[!, :table_name] .= table_name
# Adjust the select statement to include the new table_name column
return select(result, 1 => :name, 2 => :type, :current_selxn, :table_name)
end


# DuckDB
function get_table_metadata(conn::DuckDB.Connection, table_name::String)
query = """
Expand All @@ -161,7 +164,9 @@ function get_table_metadata(conn::DuckDB.Connection, table_name::String)
"""
result = DuckDB.execute(conn, query) |> DataFrame
result[!, :current_selxn] .= 1
return select(result, 1 => :name, 2 => :type, :current_selxn)
result[!, :table_name] .= table_name
# Adjust the select statement to include the new table_name column
return select(result, 1 => :name, 2 => :type, :current_selxn, :table_name)
end

# MySQL
Expand All @@ -177,7 +182,9 @@ function get_table_metadata(conn::MySQL.Connection, table_name::String)
result = DBInterface.execute(conn, query) |> DataFrame
result[!, :DATA_TYPE] = map(x -> String(x), result.DATA_TYPE)
result[!, :current_selxn] .= 1
return select(result, :COLUMN_NAME => :name, :DATA_TYPE => :type, :current_selxn)
result[!, :table_name] .= table_name
# Adjust the select statement to include the new table_name column
return select(result, :COLUMN_NAME => :name, 2 => :type, :current_selxn, :table_name)
end

# MSSQL
Expand All @@ -193,8 +200,10 @@ function get_table_metadata(conn::ODBC.Connection, table_name::String)
result = DBInterface.execute(conn, query) |> DataFrame
#result[!, :DATA_TYPE] = map(x -> String(x), result.DATA_TYPE)
result[!, :current_selxn] .= 1
return select(result, :column_name => :name, :data_type => :type, :current_selxn)
end
result[!, :table_name] .= table_name
# Adjust the select statement to include the new table_name column
return select(result, :column_name => :name, :data_type => :type, :current_selxn, :table_name,)
end

# ClickHouse
function get_table_metadata(conn::ClickHouse.ClickHouseSock, table_name::String)
Expand All @@ -209,8 +218,10 @@ function get_table_metadata(conn::ClickHouse.ClickHouseSock, table_name::String)
result = ClickHouse.select_df(conn,query)

result[!, :current_selxn] .= 1
return select(result, :column_name => :name, :data_type => :type, :current_selxn)
end
result[!, :table_name] .= table_name
# Adjust the select statement to include the new table_name column
return select(result, 1 => :name, 2 => :type, :current_selxn, :table_name)
end

function db_table(db, table::Symbol)
table_name = string(table)
Expand Down
11 changes: 10 additions & 1 deletion src/db_parsing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ function parse_tidy_db(exprs, metadata::DataFrame)
exprs_iterable = isa(exprs, Tuple) ? collect(exprs) : exprs

for expr in exprs_iterable
if occursin(".", string(expr))
push!(included_columns, string(expr))
continue
end
is_excluded = isa(expr, Expr) && expr.head == :call && expr.args[1] == :(!)
actual_expr = is_excluded ? expr.args[2] : expr

Expand Down Expand Up @@ -66,6 +70,10 @@ function parse_tidy_db(exprs, metadata::DataFrame)
end
elseif isa(actual_expr, Symbol) || isa(actual_expr, String)
# Handle single column name
if occursin(".", string(actual_expr))
push!(included_columns, string(actual_expr))
continue
end
col_name = isa(actual_expr, Symbol) ? string(actual_expr) : actual_expr
if is_excluded
push!(excluded_columns, col_name)
Expand All @@ -92,10 +100,11 @@ function parse_tidy_db(exprs, metadata::DataFrame)
else
included_columns = setdiff(included_columns, excluded_columns)
end

return included_columns
end


function parse_if_else(expr)
transformed_expr = MacroTools.postwalk(expr) do x
# Ensure we're dealing with an Expr object and it's a call to if_else
Expand Down
Loading