Skip to content

Commit

Permalink
adds docstring for aggregate functions
Browse files Browse the repository at this point in the history
  • Loading branch information
drizk1 committed Jan 22, 2025
1 parent 9153ef1 commit 00c483e
Show file tree
Hide file tree
Showing 11 changed files with 119 additions and 220 deletions.
113 changes: 97 additions & 16 deletions src/docstrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,13 @@ julia> @chain db_table(db, :df_mem) begin
8 │ AH aa 3 0.8 -1.33393 -1.2
9 │ AI bb 4 0.9 -1.33393 -1.2
10 │ AJ aa 5 1.0 -1.33393 -1.2
julia> @chain db_table(db, :df_mem) begin
@mutate(value2 = sum(value),
_order = desc([:value, :percent]),
_frame = 2);
@collect
end;
```
"""

Expand Down Expand Up @@ -1525,34 +1532,22 @@ julia> @chain t(df1_table) @union(df2_table) @collect
5 │ 5 50
6 │ 6 60
julia> query = @chain t(df2_table) @filter(value == 50);
julia> @chain t(df1_table) begin
@union(t(query))
@union("df1", all = false)
@collect
end
4×2 DataFrame
3×2 DataFrame
Row │ id value
│ Int64 Int64
─────┼──────────────
1 │ 1 10
2 │ 2 20
3 │ 3 30
4 │ 5 50
julia> @chain t(df1_table) begin
@union(t(df1_table))
@union("df1", all = true)
@collect
end
3×2 DataFrame
Row │ id value
│ Int64 Int64
─────┼──────────────
1 │ 1 10
2 │ 2 20
3 │ 3 30
julia> @chain t(df1_table) @union(df1_table, all = true) @collect
6×2 DataFrame
Row │ id value
│ Int64 Int64
Expand All @@ -1563,6 +1558,22 @@ julia> @chain t(df1_table) @union(df1_table, all = true) @collect
4 │ 1 10
5 │ 2 20
6 │ 3 30
julia> query = @chain t(df2_table) @filter(value == 50);
julia> @chain t(df1_table) begin
@mutate(id = id + 5)
@filter(id > 6)
@union(t(query))
@collect
end
3×2 DataFrame
Row │ id value
│ Int64 Int64
─────┼──────────────
1 │ 7 20
2 │ 8 30
3 │ 5 50
```
"""

Expand Down Expand Up @@ -1843,7 +1854,7 @@ julia> @chain db_table(db, :df_mem) begin
10 │ 1.0 aa 5 AJ
julia> @chain db_table(db, :df_mem) begin
@relocate([percent, groups], before = id)
@relocate([:percent, :groups], before = id)
@collect
end
10×4 DataFrame
Expand All @@ -1861,4 +1872,74 @@ julia> @chain db_table(db, :df_mem) begin
9 │ 0.9 bb AI 4
10 │ 1.0 aa AJ 5
```
"""


const docstring_aggregate_functions =
"""
Aggregate Functions
Nearly all aggregate functions from any database are supported both `@summarize` and `@mutate`.
With `@summarize`, an aggregate functions available on a SQL backend can be used as they are in sql with the same syntax (`'` should be replaced with `"`)
`@mutate` supports them as well, however, unless listed below, the function call muset be wrapped with `agg()`
- `maximum`, `minimum`, `mean`, `std`
The list of DuckDB aggregate functions and their syntax can be found [here](https://duckdb.org/docs/sql/functions/aggregates.html#general-aggregate-functions)
Please refer to your backend documentation for a complete list with syntac, but open an issue on TidierDB if your run into roadblocks.
# Examples
```jldoctest
julia> df = DataFrame(id = [string('A' + i ÷ 26, 'A' + i % 26) for i in 0:9],
groups = [i % 2 == 0 ? "aa" : "bb" for i in 1:10],
value1 = [i - 4^1 for i in -4.5:4.5],
value2 = [i + 2^i for i in 1:10],
percent = 0.1:0.1:1.0);
julia> db = connect(duckdb());
julia> copy_to(db, df, "df_agg");
julia> @chain db_table(db, :df_agg) begin
@summarise(
r2 = regr_r2(value2, value1),
across(contains("value"), median),
_by = groups)
@arrange(groups)
@collect
end
2×4 DataFrame
Row │ groups r2 value1_median value2_median
│ String Float64 Float64 Float64
─────┼────────────────────────────────────────────────
1 │ aa 0.700161 -3.5 70.0
2 │ bb 0.703783 -4.5 37.0
julia> @chain db_table(db, :df_agg) begin
@mutate(
slope = agg(regr_slope(value1, value2)),
var = agg(var_samp(value2)),
std = std(value2), # since this is in the list above, it does not get wrapped in `agg`
_by = groups
)
@mutate(var = round(var))
@select !percent
@arrange(groups)
@collect
end
10×7 DataFrame
Row │ id groups value1 value2 slope var std
│ String String Float64 Int64 Float64 Float64 Float64
─────┼────────────────────────────────────────────────────────────────
1 │ AB aa -7.5 6 0.00608835 188885.0 434.609
2 │ AD aa -5.5 20 0.00608835 188885.0 434.609
3 │ AF aa -3.5 70 0.00608835 188885.0 434.609
4 │ AH aa -1.5 264 0.00608835 188885.0 434.609
5 │ AJ aa 0.5 1034 0.00608835 188885.0 434.609
6 │ AA bb -8.5 3 0.0121342 47799.0 218.629
7 │ AC bb -6.5 11 0.0121342 47799.0 218.629
8 │ AE bb -4.5 37 0.0121342 47799.0 218.629
9 │ AG bb -2.5 135 0.0121342 47799.0 218.629
10 │ AI bb -0.5 521 0.0121342 47799.0 218.629
```
"""
21 changes: 0 additions & 21 deletions src/parsing_athena.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,27 +62,6 @@ function expr_to_sql_trino(expr, sq; from_summarize::Bool)
window_clause = construct_window_clause(sq, )
return "STDDEV_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, cor(a_, b_))
if from_summarize
return :(CORR($a))
else
window_clause = construct_window_clause(sq)
return "CORR($(string(a))) $(window_clause)"
end
elseif @capture(x, cov(a_, b_))
if from_summarize
return :(COVAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "COVAR_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, var(a_))
if from_summarize
return :(VAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "VAR_SAMP($(string(a))) $(window_clause)"
end
elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg
args = x.args[2:end] # Capture all arguments to agg
if from_summarize
Expand Down
21 changes: 0 additions & 21 deletions src/parsing_clickhouse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,27 +62,6 @@ function expr_to_sql_clickhouse(expr, sq; from_summarize::Bool)
window_clause = construct_window_clause(sq, )
return "STDDEV_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, cor(a_, b_))
if from_summarize
return :(CORR($a))
else
window_clause = construct_window_clause(sq)
return "CORR($(string(a))) $(window_clause)"
end
elseif @capture(x, cov(a_, b_))
if from_summarize
return :(COVAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "COVAR_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, var(a_))
if from_summarize
return :(VAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "VAR_SAMP($(string(a))) $(window_clause)"
end
elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg
args = x.args[2:end] # Capture all arguments to agg
if from_summarize
Expand Down
28 changes: 7 additions & 21 deletions src/parsing_duckdb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,27 +64,7 @@ function expr_to_sql_duckdb(expr, sq; from_summarize::Bool)
window_clause = construct_window_clause(sq, )
return "STDDEV_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, cor(a_, b_))
if from_summarize
return :(CORR($a))
else
window_clause = construct_window_clause(sq)
return "CORR($(string(a))) $(window_clause)"
end
elseif @capture(x, cov(a_, b_))
if from_summarize
return :(COVAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "COVAR_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, var(a_))
if from_summarize
return :(VAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "VAR_SAMP($(string(a))) $(window_clause)"
end

elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg
args = x.args[2:end] # Capture all arguments to agg
if from_summarize
Expand Down Expand Up @@ -192,3 +172,9 @@ function expr_to_sql_duckdb(expr, sq; from_summarize::Bool)
return x
end
end

"""
$docstring_aggregate_functions
"""
function aggregate_fxns()
end
21 changes: 0 additions & 21 deletions src/parsing_gbq.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,27 +62,6 @@ function expr_to_sql_gbq(expr, sq; from_summarize::Bool)
window_clause = construct_window_clause(sq, )
return "STDDEV_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, cor(a_, b_))
if from_summarize
return :(CORR($a))
else
window_clause = construct_window_clause(sq)
return "CORR($(string(a))) $(window_clause)"
end
elseif @capture(x, cov(a_, b_))
if from_summarize
return :(COVAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "COVAR_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, var(a_))
if from_summarize
return :(VAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "VAR_SAMP($(string(a))) $(window_clause)"
end
elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg
args = x.args[2:end] # Capture all arguments to agg
if from_summarize
Expand Down
21 changes: 0 additions & 21 deletions src/parsing_mssql.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,27 +62,6 @@ function expr_to_sql_mssql(expr, sq; from_summarize::Bool)
window_clause = construct_window_clause(sq, )
return "STDDEV_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, cor(a_, b_))
if from_summarize
return :(CORR($a))
else
window_clause = construct_window_clause(sq)
return "CORR($(string(a))) $(window_clause)"
end
elseif @capture(x, cov(a_, b_))
if from_summarize
return :(COVAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "COVAR_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, var(a_))
if from_summarize
return :(VAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "VAR_SAMP($(string(a))) $(window_clause)"
end
elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg
args = x.args[2:end] # Capture all arguments to agg
if from_summarize
Expand Down
21 changes: 0 additions & 21 deletions src/parsing_mysql.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,27 +62,6 @@ function expr_to_sql_mysql(expr, sq; from_summarize::Bool)
window_clause = construct_window_clause(sq, )
return "STDDEV_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, cor(a_, b_))
if from_summarize
return :(CORR($a))
else
window_clause = construct_window_clause(sq)
return "CORR($(string(a))) $(window_clause)"
end
elseif @capture(x, cov(a_, b_))
if from_summarize
return :(COVAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "COVAR_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, var(a_))
if from_summarize
return :(VAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "VAR_SAMP($(string(a))) $(window_clause)"
end
elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg
args = x.args[2:end] # Capture all arguments to agg
if from_summarize
Expand Down
31 changes: 5 additions & 26 deletions src/parsing_oracle.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,36 +62,15 @@ function expr_to_sql_oracle(expr, sq; from_summarize::Bool)
window_clause = construct_window_clause(sq, )
return "STDDEV_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, cor(a_, b_))
if from_summarize
return :(CORR($a))
else
window_clause = construct_window_clause(sq)
return "CORR($(string(a))) $(window_clause)"
end
elseif @capture(x, cov(a_, b_))
if from_summarize
return :(COVAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "COVAR_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, var(a_))
if from_summarize
return :(VAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "VAR_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, Expr(:call, :agg, args...))
elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg
args = x.args[2:end] # Capture all arguments to agg
if from_summarize
return error("agg is only needed with aggregate functions in @mutate")
else
window_clause = construct_window_clause(sq)

# Create the SQL string representation of the aggregate function call
arg_str = join(map(string, args), ", ") # Join arguments into a string
str = "agg($(arg_str))" # Construct the function call string
# Create the SQL string representation of the agg function call
arg_str = join(map(string, args), ", ")
str = "$(arg_str)"
return "$(str) $(window_clause)"
end
elseif !isempty(sq.window_order) && isa(x, Expr) && x.head == :call
Expand Down
Loading

0 comments on commit 00c483e

Please sign in to comment.