Skip to content

Commit

Permalink
Merge pull request #90 from TidierOrg/wildcard-file-path-bugfix
Browse files Browse the repository at this point in the history
fix bug when reading w wildcard
  • Loading branch information
drizk1 authored Dec 12, 2024
2 parents 438f198 + d7418d4 commit 3a90366
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 12 deletions.
2 changes: 1 addition & 1 deletion docs/examples/UserGuide/getting_started.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
# - `db_table` has two required arguments: `connection` and `table`
# - `table` can be a table name on a database or a path/url to file to read. When passing `db_table` a path or url, the table is not copied into memory.
# - Of note, `db_table` only support direct file paths to a table. It does not support database file paths such as `dbname.duckdb` or `dbname.sqlite`. Such files must be used with `connect` first.
# - With DuckDB and ClickHouse, if you have a folder of multiple files to read, you can use `*` read in all files matching the pattern.
# - With DuckDB and ClickHouse, if you have a folder of multiple files to read, you can use `*` read in all files matching the pattern, with an optional `alias` argument for what the data should be referred to.
# - For example, the below would read all files that end in `.csv` in the given folder.
# ```
# db_table(db, "folder/path/*.csv")
Expand Down
31 changes: 21 additions & 10 deletions src/TidierDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ function finalize_query(sqlquery::SQLQuery)
end

# DuckDB
function get_table_metadata(conn::Union{DuckDB.DB, DuckDB.Connection}, table_name::String)
function get_table_metadata(conn::Union{DuckDB.DB, DuckDB.Connection}, table_name::String; alias::String="")
set_sql_mode(duckdb());
if endswith(table_name, ".geoparquet'")
query =
Expand All @@ -203,12 +203,18 @@ function get_table_metadata(conn::Union{DuckDB.DB, DuckDB.Connection}, table_nam
end
result = DuckDB.execute(conn, query) |> DataFrame
result[!, :current_selxn] .= 1
table_name = if occursin(r"[:/\\]", table_name)
split(basename(table_name), '.')[1]
elseif occursin(".", table_name)
split(basename(table_name), '.')[end]
if occursin("*" , table_name)
if alias != ""
table_name = alias
else
table_name = "data"
end
elseif occursin(r"[:/\\]", table_name)
table_name = split(basename(table_name), '.')[1]
elseif occursin(".", table_name)
table_name = split(basename(table_name), '.')[end]
else
table_name
table_name = table_name
end
if occursin("-" , table_name)
table_name = replace(table_name, "-" => "_")
Expand All @@ -227,7 +233,7 @@ end
"""
$docstring_db_table
"""
function db_table(db, table, athena_params::Any=nothing; iceberg::Bool=false, delta::Bool=false)
function db_table(db, table, athena_params::Any=nothing; iceberg::Bool=false, delta::Bool=false, alias::String="")
table_name = string(table)

if current_sql_mode[] == sqlite()
Expand All @@ -249,7 +255,7 @@ function db_table(db, table, athena_params::Any=nothing; iceberg::Bool=false, de
metadata = get_table_metadata(db, table_name2)
elseif occursin(r"[:/\\]", table_name)
table_name2 = "'$table_name'"
metadata = get_table_metadata(db, table_name2)
metadata = get_table_metadata(db, table_name2; alias = alias)
else
metadata = get_table_metadata(db, table_name)
end
Expand All @@ -273,10 +279,15 @@ function db_table(db, table, athena_params::Any=nothing; iceberg::Bool=false, de
elseif delta
"delta_scan('$table_name')"
elseif occursin(r"[:/\\]", table_name) && !(iceberg || delta) && !startswith(table_name, "read")
if occursin(r"\*", table_name)
alias = alias == "" ? "data" : alias
else
alias = (split(basename(table_name), '.')[1])
end
name = if occursin(".geoparquet", table_name)
"read_parquet('$table_name') AS $(split(basename(table_name), '.')[1]) "
"read_parquet('$table_name') AS $alias "
else
"'$table_name' AS $(split(basename(table_name), '.')[1]) "
"'$table_name' AS $alias "
end
formatted_table_name = begin
parts = split(name, " AS ")
Expand Down
2 changes: 1 addition & 1 deletion src/docstrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1167,7 +1167,7 @@ name it will not copy it to memory, but rather ready directly from the file. `db
- `db_table(db, "Path/to/testing_files/*.parquet")`
- `delta`: must be true to read delta files
- `iceberg`: must be true to read iceberg finalize_ctes
- `alias`: optional argument when using a `*` wildcard in a file path, that allows user to determine an alias for the data being read in. If empty, it will refer to table as `data`
# Example
```julia
Expand Down

0 comments on commit 3a90366

Please sign in to comment.