adds aggregate fxns docstrings, some more percent claimed

TidierOrg · Jan 22, 2025 · 8a0404d · 8a0404d
1 parent 00c483e
commit 8a0404d
Show file tree

Hide file tree

Showing 6 changed files with 17 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -45,7 +45,7 @@ TidierDB.jl currently supports the following top-level macros:
 | **Helper Functions**             | `across`, `desc`, `if_else`, `case_when`, `n`, `starts_with`, `ends_with`, `contains`, `as_float`, `as_integer`, `as_string`, `is_missing`, `missing_if`, `replace_missing` |
 | **TidierStrings.jl Functions** | `str_detect`, `str_replace`, `str_replace_all`, `str_remove_all`, `str_remove`                                                                                               |
 | **TidierDates.jl Functions**   | `year`, `month`, `day`, `hour`, `min`, `second`, `floor_date`, `difftime`, `mdy`, `ymd`, `dmy`                                                                                                    |
-| **Aggregate Functions**          | `mean`, `minimum`, `maximum`, `std`, `sum`, `cumsum`, `cor`, `cov`, `var`, all aggregate sql fxns
+| **Aggregate Functions**          | `mean`, `minimum`, `maximum`, `std`, `sum`, `cumsum`, and nearly all aggregate sql fxns supported
 
 `@summarize` supports any SQL aggregate function in addition to the list above. Simply write the function as written in SQL syntax and it will work.
 `@mutate` supports all builtin SQL functions as well.

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -39,7 +39,7 @@ TidierDB.jl currently supports:
 | **Helper Functions**             | `across`, `desc`, `if_else`, `case_when`, `n`, `starts_with`, `ends_with`, `contains`, `as_float`, `as_integer`, `as_string`, `is_missing`, `missing_if`, `replace_missing` |
 | **TidierStrings.jl Functions** | `str_detect`, `str_replace`, `str_replace_all`, `str_remove_all`, `str_remove`                                                                                               |
 | **TidierDates.jl Functions**   | `year`, `month`, `day`, `hour`, `min`, `second`, `floor_date`, `difftime`, `mdy`, `ymd`, `dmy`                                                                                                    |
-| **Aggregate Functions**          | `mean`, `minimum`, `maximum`, `std`, `sum`, `cumsum`, `cor`, `cov`, `var`, all aggregate sql fxns
+| **Aggregate Functions**          | `mean`, `minimum`, `maximum`, `std`, `sum`, `cumsum`, and nearly all aggregate sql fxns supported
 
 `@summarize` supports any SQL aggregate function in addition to the list above. Simply write the function as written in SQL syntax and it will work.   
 `@mutate` supports all builtin SQL functions as well.                                                                                                 

diff --git a/src/docstrings.jl b/src/docstrings.jl
@@ -1721,7 +1721,7 @@ Create a view from a SQL query. Currently supports DuckDB, MySQL, GBQ, Postgres
 # Arguments
 - `sql_query`: The SQL query to create a view from.
 - `name`: The name of the view to create.
-- `replace`: defaults to true if view should be replaced
+- `replace`: Boolean value that defaults to false so as not to replace exisiting views
 
 # Examples
 ```jldoctest
@@ -1731,9 +1731,11 @@ julia> df = DataFrame(id = [1, 2, 3], value = [10, 20, 30]);
 
 julia> copy_to(db, df, "df1");
 
-julia> @chain db_table(db, "df1") @create_view(viewer);
+julia> @chain db_table(db, "df1") @create_view(viewer); # will note overwrite existing view
 
 julia> db_table(db, "viewer");
+
+julia> @chain db_table(db, "df1") @create_view(viewer, true); # will overwrite exisiting view
 ```
 """
 
@@ -1883,8 +1885,8 @@ Nearly all aggregate functions from any database are supported both `@summarize`
 
 With `@summarize`, an aggregate functions available on a SQL backend can be used as they are in sql with the same syntax (`'` should be replaced with `"`)
 
-`@mutate` supports them as well, however, unless listed below, the function call muset be wrapped with `agg()`
-       - `maximum`, `minimum`, `mean`, `std`
+`@mutate` supports them as well, however, unless listed below, the function call must be wrapped with `agg()`
+       - `maximum`, `minimum`, `mean`, `std`, `sum`, `cumsum`
 
 The list of DuckDB aggregate functions and their syntax can be found [here](https://duckdb.org/docs/sql/functions/aggregates.html#general-aggregate-functions)
 Please refer to your backend documentation for a complete list with syntac, but open an issue on TidierDB if your run into roadblocks.  

diff --git a/src/parsing_duckdb.jl b/src/parsing_duckdb.jl
@@ -173,8 +173,11 @@ function expr_to_sql_duckdb(expr, sq; from_summarize::Bool)
     end
 end
 
+# This is to get aggreagate function docstring.
+# COV_EXCL_START
 """
 $docstring_aggregate_functions
 """
-function aggregate_fxns() 
-end
+function aggregate_functions() 
+end
+# COV_EXCL_STOP
diff --git a/src/view_compute.jl b/src/view_compute.jl
@@ -10,7 +10,7 @@ end
 """
 $docstring_create_view
 """
-macro create_view(sqlquery, name, replace = true)
+macro create_view(sqlquery, name, replace = false)
     if replace == true 
         sql_cr_or_replace = "CREATE OR REPLACE VIEW $name AS "
     elseif replace == false
@@ -20,8 +20,6 @@ macro create_view(sqlquery, name, replace = true)
         sq = $(esc(sqlquery))
         if current_sql_mode[] == duckdb()
             final_compute($(esc(sqlquery)), duckdb, $sql_cr_or_replace)
-
-
         elseif current_sql_mode[] == postgres()
             final_compute($(esc(sqlquery)), postgres, $sql_cr_or_replace)
         elseif current_sql_mode[] == gbq()

diff --git a/test/comp_tests.jl b/test/comp_tests.jl
@@ -152,8 +152,8 @@
         #mutating after summarizing and with cumsum
         TDF_4 = @chain test_df @group_by(groups) @summarize(across(value,(mean, minimum))) @mutate(new = value_mean - value_minimum)
         TDB_4 = @chain DB.t(test_db) DB.@group_by(groups) DB.@summarize(across(value, (mean, minimum))) DB.@mutate(new = value_mean - value_minimum) DB.@collect
-        #TDF_5 = @chain test_df @group_by(groups) @mutate(value = cumsum(value)) @ungroup() @arrange(id)
-        #TDB_5 = @chain DB.t(test_db) DB.@mutate(value = cumsum(value), _by = groups)  DB.@collect() @arrange(id)
+        TDF_5 = @chain test_df @group_by(groups) @mutate(value = cumsum(value)) @ungroup() 
+        TDB_5 = @chain DB.t(test_db) DB.@mutate(value = cumsum(value), _order = id, _by = groups)  DB.@collect() @arrange(id) 
         TDF_6 = @chain test_df @mutate(id = lowercase(id), groups = uppercase(groups))
         TDB_6 = @chain DB.t(test_db)  DB.@mutate(id = lower(id), groups = upper(groups)) DB.@collect
         # mutating with agg function across groups, then filtering
@@ -185,7 +185,7 @@
         @test all(isequal.(Array(TDF_2), Array(TDB_2)))
         @test all(isequal.(Array(TDF_3), Array(TDB_3)))
         @test all(isequal.(Array(TDF_4), Array(TDB_4)))
-       # @test all(isequal.(Array(TDF_5), Array(TDB_5)))
+        @test all(isequal.(Array(TDF_5), Array(TDB_5)))
         @test all(isequal.(Array(TDF_6), Array(TDB_6)))
         @test all(isequal.(Array(TDF_7), Array(TDB_7)))
         @test all(isequal.(Array(TDF_8), Array(TDB_8)))