diff --git a/latest/examples/generated/UserGuide/getting_started/index.html b/latest/examples/generated/UserGuide/getting_started/index.html
index 51c29fd..6961633 100644
--- a/latest/examples/generated/UserGuide/getting_started/index.html
+++ b/latest/examples/generated/UserGuide/getting_started/index.html
@@ -708,6 +708,8 @@ <h2 id="minimizing-compute-costs">Minimizing Compute Costs<a class="headerlink"
 <span class="k">end</span>
 </code></pre></div>
 <hr />
+<p>Tip: Setting <code>t(table) = from_query(table)</code> will save some keystrokes. This means after saving the results of <code>db_table</code> you can start all chains/refer to the data with `t(table)`` –-</p>
+<hr />
 <p><em>This page was generated using <a href="https://github.com/fredrikekre/Literate.jl">Literate.jl</a>.</em></p>
 
 
diff --git a/latest/examples/generated/UserGuide/ibis_comp/index.html b/latest/examples/generated/UserGuide/ibis_comp/index.html
index 316d669..d652664 100644
--- a/latest/examples/generated/UserGuide/ibis_comp/index.html
+++ b/latest/examples/generated/UserGuide/ibis_comp/index.html
@@ -778,7 +778,7 @@ <h2 id="loading-data">Loading Data<a class="headerlink" href="#loading-data" tit
 <p><a id='Previewing-the-data'></a></p>
 <p><a id='Previewing-the-data-1'></a></p>
 <h2 id="previewing-the-data">Previewing the data<a class="headerlink" href="#previewing-the-data" title="Permanent link">¤</a></h2>
-<p>TidierDB and Ibis use <code>head</code>/<code>@head</code> to preview the first rows of a dataset.</p>
+<p>TidierDB and Ibis use <code>head</code>/<code>@head</code> to preview the first rows of a dataset. Ibis</p>
 <div class="highlight"><pre><span></span><code><span class="n">mtcars</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">6</span><span class="p">)</span>
 </code></pre></div>
 <div class="highlight"><pre><span></span><code>┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┓
@@ -794,6 +794,7 @@ <h2 id="previewing-the-data">Previewing the data<a class="headerlink" href="#pre
 │ Valiant           │    18.1 │     6 │   225.0 │   105 │    2.76 │   3.460 │   20.22 │     1 │     0 │     3 │     1 │
 └───────────────────┴─────────┴───────┴─────────┴───────┴─────────┴─────────┴─────────┴───────┴───────┴───────┴───────┘
 </code></pre></div>
+<p>TidierDB</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@chain</span><span class="w"> </span><span class="n">t</span><span class="p">(</span><span class="n">mtcars</span><span class="p">)</span><span class="w"> </span><span class="nd">@head</span><span class="p">(</span><span class="mi">6</span><span class="p">)</span><span class="w"> </span><span class="nd">@collect</span>
 </code></pre></div>
 <div class="highlight"><pre><span></span><code>6×12 DataFrame
@@ -810,7 +811,7 @@ <h2 id="previewing-the-data">Previewing the data<a class="headerlink" href="#pre
 <p><a id='Filtering'></a></p>
 <p><a id='Filtering-1'></a></p>
 <h2 id="filtering">Filtering<a class="headerlink" href="#filtering" title="Permanent link">¤</a></h2>
-<p>The example below demonstrates how to filter using multiple criteria in both Ibis and TidierData</p>
+<p>The example below demonstrates how to filter using multiple criteria in both Ibis and TidierData Ibis</p>
 <div class="highlight"><pre><span></span><code><span class="n">mtcars</span><span class="o">.</span><span class="n">filter</span><span class="p">(((</span><span class="n">_</span><span class="o">.</span><span class="n">mpg</span> <span class="o">&gt;</span> <span class="mi">22</span><span class="p">)</span> <span class="o">&amp;</span> <span class="p">(</span><span class="n">_</span><span class="o">.</span><span class="n">drat</span> <span class="o">&gt;</span> <span class="mi">4</span><span class="p">)</span> <span class="o">|</span> <span class="p">(</span><span class="n">_</span><span class="o">.</span><span class="n">hp</span> <span class="o">==</span> <span class="mi">113</span><span class="p">)))</span>
 </code></pre></div>
 <div class="highlight"><pre><span></span><code>┏━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┓
@@ -846,7 +847,7 @@ <h2 id="filtering">Filtering<a class="headerlink" href="#filtering" title="Perma
 <p><a id='Creating-new-columns'></a></p>
 <p><a id='Creating-new-columns-1'></a></p>
 <h2 id="creating-new-columns">Creating new columns<a class="headerlink" href="#creating-new-columns" title="Permanent link">¤</a></h2>
-<p>Both TidierDB and Ibis use <code>mutate</code>/<code>@mutate</code> to add new columns</p>
+<p>Both TidierDB and Ibis use <code>mutate</code>/<code>@mutate</code> to add new columns Ibis</p>
 <div class="highlight"><pre><span></span><code><span class="p">(</span>
    <span class="n">mtcars</span>
         <span class="o">.</span><span class="n">mutate</span><span class="p">(</span><span class="n">kpg</span> <span class="o">=</span> <span class="n">_</span><span class="o">.</span><span class="n">mpg</span> <span class="o">*</span> <span class="mf">1.61</span><span class="p">)</span>
@@ -871,6 +872,7 @@ <h2 id="creating-new-columns">Creating new columns<a class="headerlink" href="#c
 │ …                 │       … │
 └───────────────────┴─────────┘
 </code></pre></div>
+<p>TidierDB</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@chain</span><span class="w"> </span><span class="n">t</span><span class="p">(</span><span class="n">mtcars</span><span class="p">)</span><span class="w"> </span><span class="k">begin</span>
 <span class="w">       </span><span class="nd">@mutate</span><span class="p">(</span><span class="n">kpg</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">mpg</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mf">1.61</span><span class="p">)</span>
 <span class="w">       </span><span class="nd">@select</span><span class="p">(</span><span class="n">model</span><span class="p">,</span><span class="w"> </span><span class="n">kpg</span><span class="p">)</span>
@@ -899,7 +901,7 @@ <h2 id="creating-new-columns">Creating new columns<a class="headerlink" href="#c
 <p><a id='Sorting-columns'></a></p>
 <p><a id='Sorting-columns-1'></a></p>
 <h2 id="sorting-columns">Sorting columns<a class="headerlink" href="#sorting-columns" title="Permanent link">¤</a></h2>
-<p>Ibis uses <code>order_by</code> similar to SQLs <code>ORDER BY</code></p>
+<p>Ibis uses <code>order_by</code> similar to SQLs <code>ORDER BY</code> Ibis</p>
 <div class="highlight"><pre><span></span><code><span class="n">mtcars</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="n">_</span><span class="o">.</span><span class="n">mpg</span><span class="p">)</span>
 </code></pre></div>
 <div class="highlight"><pre><span></span><code>┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┓
@@ -920,7 +922,7 @@ <h2 id="sorting-columns">Sorting columns<a class="headerlink" href="#sorting-col
 │ …                   │       … │     … │       … │     … │       … │       … │       … │     … │     … │     … │     … │
 └─────────────────────┴─────────┴───────┴─────────┴───────┴─────────┴─────────┴─────────┴───────┴───────┴───────┴───────┘
 </code></pre></div>
-<p>While TidierDB uses <code>@arrange</code> like TidierData.jl</p>
+<p>While TidierDB uses <code>@arrange</code> like TidierData.jl TidierDB</p>
 <div class="highlight"><pre><span></span><code>@chain t(mtcars) @arrange(mpg) @collect
 </code></pre></div>
 <div class="highlight"><pre><span></span><code>32×12 DataFrame
@@ -945,7 +947,7 @@ <h2 id="sorting-columns">Sorting columns<a class="headerlink" href="#sorting-col
 <p><a id='Selecting-columns'></a></p>
 <p><a id='Selecting-columns-1'></a></p>
 <h2 id="selecting-columns">Selecting columns<a class="headerlink" href="#selecting-columns" title="Permanent link">¤</a></h2>
-<p>In Ibis, columns must be prefixed with the table name, or in this case <code>_</code>, or they can be given as a string. Finally to using helper functions like <code>startswith</code> requires importing selectors as above.</p>
+<p>In Ibis, columns must be prefixed with the table name, or in this case <code>_</code>, or they can be given as a string. Finally to using helper functions like <code>startswith</code> requires importing selectors as above. Ibis</p>
 <div class="highlight"><pre><span></span><code>mtcars.select(s.startswith(&quot;m&quot;), &quot;drat&quot;, _.wt)
 </code></pre></div>
 <div class="highlight"><pre><span></span><code>┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┓
@@ -966,11 +968,10 @@ <h2 id="selecting-columns">Selecting columns<a class="headerlink" href="#selecti
 │ …                 │       … │       … │       … │
 └───────────────────┴─────────┴─────────┴─────────┘
 </code></pre></div>
-<p>TidierDB does not require names to be prefixed and, like TidierData, tidy column selection with <code>starts_with</code>, <code>ends_with</code>, and <code>contains</code> is supported at base. TidierDB also supports providing column names as strings, although this would only be needed in the setting of renaming a column with a space in it.</p>
+<p>TidierDB does not require names to be prefixed and, like TidierData, tidy column selection with <code>starts_with</code>, <code>ends_with</code>, and <code>contains</code> is supported at base. TidierDB also supports providing column names as strings, although this would only be needed in the setting of renaming a column with a space in it. TidierDB</p>
 <div class="highlight"><pre><span></span><code>@chain t(mtcars) @select(starts_with(&quot;m&quot;), &quot;drat&quot;, wt) @collect
 </code></pre></div>
-<div class="highlight"><pre><span></span><code>32×2 DataFrame
-32×4 DataFrame
+<div class="highlight"><pre><span></span><code>32×4 DataFrame
  Row │ model              mpg       drat      wt
      │ String?            Float64?  Float64?  Float64?
 ─────┼─────────────────────────────────────────────────
@@ -992,7 +993,7 @@ <h2 id="selecting-columns">Selecting columns<a class="headerlink" href="#selecti
 <p><a id='Multi-step-queries-and-summarizing'></a></p>
 <p><a id='Multi-step-queries-and-summarizing-1'></a></p>
 <h2 id="multi-step-queries-and-summarizing">Multi step queries and summarizing<a class="headerlink" href="#multi-step-queries-and-summarizing" title="Permanent link">¤</a></h2>
-<p>Aggregating data is done with <code>aggregate</code> in ibis and <code>@summarize</code> in TidierDB. There is a slight difference in grouping data. Ibis uses <code>by =</code> within the <code>aggregate</code> call vs TidierDB adheres to <code>@group_by</code> convention</p>
+<p>Aggregating data is done with <code>aggregate</code> in Ibis and <code>@summarize</code> in TidierDB. To group data, Ibis uses <code>by =</code> within the <code>aggregate</code> call vs TidierDB adheres to <code>@group_by</code> convention Ibis</p>
 <div class="highlight"><pre><span></span><code>mtcars.aggregate(
     total_hp=_.hp.sum(),
     avg_hp=_.hp.mean(),
@@ -1009,7 +1010,7 @@ <h2 id="multi-step-queries-and-summarizing">Multi step queries and summarizing<a
 │     4 │      909 │  82.636364 │
 └───────┴──────────┴────────────┘
 </code></pre></div>
-<p>In TidierDB, <code>@filter</code> will automatically determine whether the criteria belong in a WHERE or HAVING in SQL clause.</p>
+<p>In TidierDB, <code>@filter</code> will automatically determine whether the criteria belong in a <code>WHERE</code> or <code>HAVING</code> SQL clause. TidierDB</p>
 <div class="highlight"><pre><span></span><code>@chain t(mtcars) begin
     @group_by(cyl)
     @summarize(total_hp = sum(hp),
@@ -1028,7 +1029,7 @@ <h2 id="multi-step-queries-and-summarizing">Multi step queries and summarizing<a
 <p><a id='Renaming-columns'></a></p>
 <p><a id='Renaming-columns-1'></a></p>
 <h2 id="renaming-columns">Renaming columns<a class="headerlink" href="#renaming-columns" title="Permanent link">¤</a></h2>
-<p>Both tools use <code>rename</code>/@rename to rename columns</p>
+<p>Both tools use <code>rename</code>/@rename to rename columns Ibis</p>
 <div class="highlight"><pre><span></span><code><span class="n">mtcars</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">make_model</span> <span class="o">=</span> <span class="s2">&quot;model&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">_</span><span class="o">.</span><span class="n">make_model</span><span class="p">)</span>
 </code></pre></div>
 <div class="highlight"><pre><span></span><code>┏━━━━━━━━━━━━━━━━━━━┓
@@ -1049,6 +1050,7 @@ <h2 id="renaming-columns">Renaming columns<a class="headerlink" href="#renaming-
 │ …                 │
 └───────────────────┘
 </code></pre></div>
+<p>TidierDB</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@chain</span><span class="w"> </span><span class="n">t</span><span class="p">(</span><span class="n">mtcars</span><span class="p">)</span><span class="w"> </span><span class="nd">@rename</span><span class="p">(</span><span class="n">model_make</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">model</span><span class="p">)</span><span class="w"> </span><span class="nd">@select</span><span class="p">(</span><span class="n">model_make</span><span class="p">)</span><span class="w"> </span><span class="nd">@collect</span>
 </code></pre></div>
 <div class="highlight"><pre><span></span><code>32×1 DataFrame
diff --git a/latest/reference/index.html b/latest/reference/index.html
index 050c192..16704c9 100644
--- a/latest/reference/index.html
+++ b/latest/reference/index.html
@@ -721,7 +721,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="n">julia</span><span class="o">&gt;</span><span class="w"> </span><span class="n">db</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">connect</span><span class="p">(</span><span class="n">duckdb</span><span class="p">())</span>
 <span class="n">DuckDB</span><span class="o">.</span><span class="n">Connection</span><span class="p">(</span><span class="s">&quot;:memory:&quot;</span><span class="p">)</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TidierDB.jl#L351-L394' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TidierDB.jl#L351-L394' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.copy_to-Tuple{Any, Union{AbstractString, DataFrame}, String}' href='#TidierDB.copy_to-Tuple{Any, Union{AbstractString, DataFrame}, String}'>#</a>
 <strong><code>TidierDB.copy_to</code></strong> &mdash; <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="w">   </span><span class="n">copy_to</span><span class="p">(</span><span class="n">conn</span><span class="p">,</span><span class="w"> </span><span class="n">df_or_path</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;name&quot;</span><span class="p">)</span>
@@ -739,7 +739,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 
 <span class="gp">julia&gt;</span><span class="w"> </span><span class="n">copy_to</span><span class="p">(</span><span class="n">db</span><span class="p">,</span><span class="w"> </span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;test&quot;</span><span class="p">);</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TidierDB.jl#L304-L323' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TidierDB.jl#L304-L323' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.db_table' href='#TidierDB.db_table'>#</a>
 <strong><code>TidierDB.db_table</code></strong> &mdash; <em>Function</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">db_table</span><span class="p">(</span><span class="n">database</span><span class="p">,</span><span class="w"> </span><span class="n">table_name</span><span class="p">,</span><span class="w"> </span><span class="n">athena_params</span><span class="p">,</span><span class="w"> </span><span class="n">delta</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="n">iceberg</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">false</span><span class="p">)</span>
@@ -779,7 +779,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="w">   </span><span class="mi">3</span><span class="w"> </span><span class="n">│</span><span class="w"> </span><span class="n">value</span><span class="w">    </span><span class="n">BIGINT</span><span class="w">               </span><span class="mi">1</span><span class="w">  </span><span class="n">df_mem</span>
 <span class="w">   </span><span class="mi">4</span><span class="w"> </span><span class="n">│</span><span class="w"> </span><span class="n">percent</span><span class="w">  </span><span class="n">DOUBLE</span><span class="w">               </span><span class="mi">1</span><span class="w">  </span><span class="n">df_mem</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="n">DuckDB</span><span class="o">.</span><span class="n">Connection</span><span class="p">(</span><span class="s">&quot;:memory:&quot;</span><span class="p">),</span><span class="w"> </span><span class="n">TidierDB</span><span class="o">.</span><span class="n">CTE</span><span class="p">[],</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="nb">nothing</span><span class="p">)</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TidierDB.jl#L194-L237' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TidierDB.jl#L194-L237' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@anti_join-NTuple{4, Any}' href='#TidierDB.@anti_join-NTuple{4, Any}'>#</a>
 <strong><code>TidierDB.@anti_join</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@anti_join</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">join_table</span><span class="p">,</span><span class="w"> </span><span class="n">new_table_col</span><span class="p">,</span><span class="w"> </span><span class="n">orignal_table_col</span><span class="p">)</span>
@@ -822,7 +822,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │ AH       aa            3       0.8</span>
 <span class="go">   5 │ AJ       aa            5       1.0</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/joins_sq.jl#L251-L296' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/joins_sq.jl#L251-L296' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@arrange-Tuple{Any, Vararg{Any}}' href='#TidierDB.@arrange-Tuple{Any, Vararg{Any}}'>#</a>
 <strong><code>TidierDB.@arrange</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@arrange</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="o">...</span><span class="p">)</span>
@@ -862,7 +862,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   9 │ AJ       aa            5       1.0</span>
 <span class="go">  10 │ AE       bb            5       0.5</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L120-L159' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L120-L159' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@collect' href='#TidierDB.@collect'>#</a>
 <strong><code>TidierDB.@collect</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@collect</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">stream</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">false</span><span class="p">)</span>
@@ -899,7 +899,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="w">   </span><span class="mi">9</span><span class="w"> </span><span class="n">│</span><span class="w"> </span><span class="n">AI</span><span class="w">       </span><span class="n">bb</span><span class="w">            </span><span class="mi">4</span><span class="w">       </span><span class="mf">0.9</span>
 <span class="w">  </span><span class="mi">10</span><span class="w"> </span><span class="n">│</span><span class="w"> </span><span class="n">AJ</span><span class="w">       </span><span class="n">aa</span><span class="w">            </span><span class="mi">5</span><span class="w">       </span><span class="mf">1.0</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L721-L756' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L721-L756' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@count-Tuple{Any, Vararg{Any}}' href='#TidierDB.@count-Tuple{Any, Vararg{Any}}'>#</a>
 <strong><code>TidierDB.@count</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@count</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="o">...</span><span class="p">)</span>
@@ -932,7 +932,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │ aa            5</span>
 <span class="go">   2 │ bb            5</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L448-L480' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L448-L480' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@distinct-Tuple{Any, Vararg{Any}}' href='#TidierDB.@distinct-Tuple{Any, Vararg{Any}}'>#</a>
 <strong><code>TidierDB.@distinct</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@distinct</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="o">...</span><span class="p">)</span>
@@ -985,7 +985,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   9 │ AI       bb            4       0.9</span>
 <span class="go">  10 │ AJ       aa            5       1.0</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L327-L383' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L327-L383' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@filter-Tuple{Any, Vararg{Any}}' href='#TidierDB.@filter-Tuple{Any, Vararg{Any}}'>#</a>
 <strong><code>TidierDB.@filter</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@filter</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">conditions</span><span class="o">...</span><span class="p">)</span>
@@ -1041,7 +1041,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │ aa            0.6</span>
 <span class="go">   2 │ bb            0.5</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L38-L93' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L38-L93' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@full_join-NTuple{4, Any}' href='#TidierDB.@full_join-NTuple{4, Any}'>#</a>
 <strong><code>TidierDB.@full_join</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@inner_join</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">join_table</span><span class="p">,</span><span class="w"> </span><span class="n">new_table_col</span><span class="p">,</span><span class="w"> </span><span class="n">orignal_table_col</span><span class="p">)</span>
@@ -1091,7 +1091,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">  11 │ missing  missing  missing  missing    AK       Y              68</span>
 <span class="go">  12 │ missing  missing  missing  missing    AM       X              74</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/joins_sq.jl#L155-L207' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/joins_sq.jl#L155-L207' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@group_by-Tuple{Any, Vararg{Any}}' href='#TidierDB.@group_by-Tuple{Any, Vararg{Any}}'>#</a>
 <strong><code>TidierDB.@group_by</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@group_by</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="o">...</span><span class="p">)</span>
@@ -1124,7 +1124,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │ aa</span>
 <span class="go">   2 │ bb</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L288-L320' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L288-L320' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@head-Tuple{Any, Any}' href='#TidierDB.@head-Tuple{Any, Any}'>#</a>
 <strong><code>TidierDB.@head</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@head</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">value</span><span class="p">)</span>
@@ -1155,7 +1155,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">─────┼────────────────────────────────────</span>
 <span class="go">   1 │ AA       bb            1       0.1</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L761-L792' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L761-L792' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@inner_join-NTuple{4, Any}' href='#TidierDB.@inner_join-NTuple{4, Any}'>#</a>
 <strong><code>TidierDB.@inner_join</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@inner_join</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">join_table</span><span class="p">,</span><span class="w"> </span><span class="n">new_table_col</span><span class="p">,</span><span class="w"> </span><span class="n">orignal_table_col</span><span class="p">)</span>
@@ -1198,7 +1198,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │ AG       bb            2       0.7  AG       Y             83</span>
 <span class="go">   5 │ AI       bb            4       0.9  AI       X             95</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/joins_sq.jl#L107-L152' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/joins_sq.jl#L107-L152' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@interpolate-Tuple' href='#TidierDB.@interpolate-Tuple'>#</a>
 <strong><code>TidierDB.@interpolate</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@interpolate</span><span class="p">(</span><span class="n">args</span><span class="o">...</span><span class="p">)</span>
@@ -1246,7 +1246,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="n">─────┼───────────────────────────</span>
 <span class="w">   </span><span class="mi">1</span><span class="w"> </span><span class="n">│</span><span class="w"> </span><span class="n">AA</span><span class="w">            </span><span class="mi">1</span><span class="w">       </span><span class="mf">0.1</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/structs.jl#L60-L104' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/structs.jl#L60-L104' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@left_join-NTuple{4, Any}' href='#TidierDB.@left_join-NTuple{4, Any}'>#</a>
 <strong><code>TidierDB.@left_join</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@left_join</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">join_table</span><span class="p">,</span><span class="w"> </span><span class="n">new_table_col</span><span class="p">,</span><span class="w"> </span><span class="n">orignal_table_col</span><span class="p">)</span>
@@ -1294,7 +1294,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   9 │ AH       aa            3       0.8  missing  missing   missing </span>
 <span class="go">  10 │ AJ       aa            5       1.0  missing  missing   missing </span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/joins_sq.jl#L12-L62' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/joins_sq.jl#L12-L62' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@mutate-Tuple{Any, Vararg{Any}}' href='#TidierDB.@mutate-Tuple{Any, Vararg{Any}}'>#</a>
 <strong><code>TidierDB.@mutate</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@mutate</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -1334,7 +1334,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   9 │ AI       bb           16       0.9      0.81</span>
 <span class="go">  10 │ AJ       aa           20       1.0      1.0</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L183-L221' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L183-L221' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@rename-Tuple{Any, Vararg{Any}}' href='#TidierDB.@rename-Tuple{Any, Vararg{Any}}'>#</a>
 <strong><code>TidierDB.@rename</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@rename</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">renamings</span><span class="o">...</span><span class="p">)</span>
@@ -1371,7 +1371,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   9 │ AI       bb            4       0.9</span>
 <span class="go">  10 │ AJ       aa            5       1.0</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L488-L527' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L488-L527' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@right_join-NTuple{4, Any}' href='#TidierDB.@right_join-NTuple{4, Any}'>#</a>
 <strong><code>TidierDB.@right_join</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@right_join</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">join_table</span><span class="p">,</span><span class="w"> </span><span class="n">new_table_col</span><span class="p">,</span><span class="w"> </span><span class="n">orignal_table_col</span><span class="p">)</span>
@@ -1416,7 +1416,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   6 │ missing  missing  missing  missing    AK       Y             68</span>
 <span class="go">   7 │ missing  missing  missing  missing    AM       X             74</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/joins_sq.jl#L59-L106' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/joins_sq.jl#L59-L106' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@select-Tuple{Any, Vararg{Any}}' href='#TidierDB.@select-Tuple{Any, Vararg{Any}}'>#</a>
 <strong><code>TidierDB.@select</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@select</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="p">)</span>
@@ -1475,7 +1475,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   9 │      4       0.9</span>
 <span class="go">  10 │      5       1.0</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L1-L60' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L1-L60' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@semi_join-NTuple{4, Any}' href='#TidierDB.@semi_join-NTuple{4, Any}'>#</a>
 <strong><code>TidierDB.@semi_join</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@semi_join</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">join_table</span><span class="p">,</span><span class="w"> </span><span class="n">new_table_col</span><span class="p">,</span><span class="w"> </span><span class="n">orignal_table_col</span><span class="p">)</span>
@@ -1518,7 +1518,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │ AG       bb            2       0.7</span>
 <span class="go">   5 │ AI       bb            4       0.9</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/joins_sq.jl#L203-L248' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/joins_sq.jl#L203-L248' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@slice_max' href='#TidierDB.@slice_max'>#</a>
 <strong><code>TidierDB.@slice_max</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@slice_max</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">column</span><span class="p">,</span><span class="w"> </span><span class="n">n</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span>
@@ -1557,7 +1557,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │ AE       bb            5       0.5         1</span>
 <span class="go">   2 │ AJ       aa            5       1.0         1</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/slices_sq.jl#L89-L127' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/slices_sq.jl#L89-L127' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@slice_min' href='#TidierDB.@slice_min'>#</a>
 <strong><code>TidierDB.@slice_min</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@slice_min</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">column</span><span class="p">,</span><span class="w"> </span><span class="n">n</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span>
@@ -1596,7 +1596,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │ AA       bb            1       0.1         1</span>
 <span class="go">   2 │ AF       aa            1       0.6         1</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/slices_sq.jl#L1-L39' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/slices_sq.jl#L1-L39' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@slice_sample' href='#TidierDB.@slice_sample'>#</a>
 <strong><code>TidierDB.@slice_sample</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@slice_sample</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">n</span><span class="p">)</span>
@@ -1628,7 +1628,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="w">       </span><span class="nd">@collect</span>
 <span class="w">       </span><span class="k">end</span><span class="p">;</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/slices_sq.jl#L179-L209' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/slices_sq.jl#L179-L209' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@summarise-Tuple{Any, Vararg{Any}}' href='#TidierDB.@summarise-Tuple{Any, Vararg{Any}}'>#</a>
 <strong><code>TidierDB.@summarise</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="w">   </span><span class="nd">@summarise</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -1675,7 +1675,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │ aa            3.0       5</span>
 <span class="go">   2 │ bb            2.5       5</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L441-L486' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L441-L486' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@summarize-Tuple{Any, Vararg{Any}}' href='#TidierDB.@summarize-Tuple{Any, Vararg{Any}}'>#</a>
 <strong><code>TidierDB.@summarize</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="w">   </span><span class="nd">@summarize</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -1722,7 +1722,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │ aa            3.0       5</span>
 <span class="go">   2 │ bb            2.5       5</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L387-L432' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L387-L432' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@window_frame-Tuple{Any, Int64, Int64}' href='#TidierDB.@window_frame-Tuple{Any, Int64, Int64}'>#</a>
 <strong><code>TidierDB.@window_frame</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@window_frame</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">frame_start</span><span class="o">::</span><span class="kt">Int</span><span class="p">,</span><span class="w"> </span><span class="n">frame_end</span><span class="o">::</span><span class="kt">Int</span><span class="p">)</span>
@@ -1744,7 +1744,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 
 <span class="gp">julia&gt;</span><span class="w"> </span><span class="n">copy_to</span><span class="p">(</span><span class="n">db</span><span class="p">,</span><span class="w"> </span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;df_mem&quot;</span><span class="p">);</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L597-L618' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L597-L618' class='documenter-source'>source</a><br></p>
 <p><a id='TidierDB.@window_order-Tuple{Any, Vararg{Any}}' href='#TidierDB.@window_order-Tuple{Any, Vararg{Any}}'>#</a>
 <strong><code>TidierDB.@window_order</code></strong> &mdash; <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="w">   </span><span class="nd">@window_order</span><span class="p">(</span><span class="n">sql_query</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="o">...</span><span class="p">)</span>
@@ -1765,7 +1765,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 
 <span class="gp">julia&gt;</span><span class="w"> </span><span class="n">copy_to</span><span class="p">(</span><span class="n">db</span><span class="p">,</span><span class="w"> </span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;df_mem&quot;</span><span class="p">);</span>
 </code></pre></div>
-<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/67ec49050bfcb7141ae27957d713f4d043013cc4/src/TBD_macros.jl#L555-L574' class='documenter-source'>source</a><br></p>
+<p><a target='_blank' href='https://github.com/TidierOrg/TidierDB.jl/blob/4c141ed48a966d2e0ce12297710915d88ae1fed4/src/TBD_macros.jl#L555-L574' class='documenter-source'>source</a><br></p>
 <p><a id='Reference-Internal-functions'></a></p>
 <p><a id='Reference-Internal-functions-1'></a></p>
 <h2 id="reference-internal-functions">Reference - Internal functions<a class="headerlink" href="#reference-internal-functions" title="Permanent link">¤</a></h2>
diff --git a/latest/search/search_index.json b/latest/search/search_index.json
index 839c22b..91a9ec7 100644
--- a/latest/search/search_index.json
+++ b/latest/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":""},{"location":"#what-is-tidierdbjl","title":"What is TidierDB.jl?","text":"<p>TiderDB.jl is a 100% Julia implementation of the dbplyr R package, and similar to Python's ibis package.</p> <p>The main goal of TidierDB.jl is to bring the syntax of Tidier.jl to multiple SQL backends, making it possible to analyze data directly on databases without needing to copy the entire database into memory.</p> <p></p> <p></p>"},{"location":"#currently-supported-backends-include","title":"Currently supported backends include:","text":"<ul> <li>DuckDB (the default) <code>duckdb()</code></li> <li>ClickHouse <code>clickhouse()</code></li> <li>SQLite <code>sqlite()</code></li> <li>MySQL and MariaDB <code>mysql()</code></li> <li>MSSQL <code>mssql()</code></li> <li>Postgres <code>postgres()</code></li> <li>Athena <code>athena()</code></li> <li>Snowflake <code>snowflake()</code></li> <li>Google Big Query <code>gbq()</code></li> <li>Oracle <code>oracle()</code></li> <li>Databricks <code>databricks()</code></li> </ul> <p>Change the backend using <code>set_sql_mode()</code> - for example  - <code>set_sql_mode(databricks())</code></p> <p></p> <p></p>"},{"location":"#installation","title":"Installation","text":"<p>For the stable version:</p> <pre><code>] add TidierDB\n</code></pre> <p>TidierDB.jl currently supports the following top-level macros:</p> <ul> <li><code>@arrange</code></li> <li><code>@group_by</code></li> <li><code>@filter</code></li> <li><code>@select</code></li> <li><code>@mutate</code>, which supports <code>across()</code></li> <li><code>@summarize</code> and <code>@summarise</code>, which supports <code>across()</code></li> <li><code>@distinct</code></li> <li><code>@left_join</code>, <code>@right_join</code>, <code>@inner_join</code>, <code>@anti_join</code>, <code>@full_join</code>, and <code>@semi_join</code> (slight syntax differences from TidierData.jl)</li> <li><code>@count</code></li> <li><code>@slice_min</code>, <code>@slice_max</code>, <code>@slice_sample</code></li> <li><code>@window_order</code> and <code>window_frame</code></li> <li><code>@show_query</code></li> <li><code>@collect</code></li> </ul> <p>Supported helper functions for most backends include:</p> <ul> <li><code>across()</code></li> <li><code>desc()</code></li> <li><code>if_else()</code> and <code>case_when()</code></li> <li><code>n()</code></li> <li><code>starts_with()</code>, <code>ends_with()</code>, and <code>contains()</code></li> <li><code>as_float()</code>, <code>as_integer()</code>, and <code>as_string()</code></li> <li><code>is_missing()</code></li> <li><code>missing_if()</code> and <code>replace_missing()</code></li> </ul> <p>From TidierStrings.jl:</p> <ul> <li><code>str_detect</code>, <code>str_replace</code>, <code>str_replace_all</code>, <code>str_remove_all</code>, <code>str_remove</code></li> </ul> <p>From TidierDates.jl:</p> <ul> <li><code>year</code>, <code>month</code>, <code>day</code>, <code>hour</code>, <code>min</code>, <code>second</code>, <code>floor_date</code>, <code>difftime</code></li> </ul> <p>Supported aggregate functions (as supported by the backend) with more to come</p> <ul> <li><code>mean</code>, <code>minimium</code>, <code>maximum</code>, <code>std</code>, <code>sum</code>, <code>cumsum</code>, <code>cor</code>, <code>cov</code>, <code>var</code></li> <li><code>@summarize</code> supports any SQL aggregate function in addition to the list above. Simply write the function as written in SQL syntax and it will work</li> </ul> <p>When using the DuckDB backend, if <code>db_table</code> recieves a file path ( <code>.parquet</code>, <code>.json</code>, <code>.csv</code>, <code>iceberg</code> or <code>delta</code>), it does not copy it into memory. This allows for queries on files too big for memory. <code>db_table</code> also supports S3 bucket locations via DuckDB.</p> <p></p> <p></p>"},{"location":"#what-is-the-recommended-way-to-use-tidierdb","title":"What is the recommended way to use TidierDB?","text":"<p>Typically, you will want to use TidierDB alongside TidierData because there are certain functionality (such as pivoting) which are only supported in TidierData and can only be performed on data frames.</p> <p>Our recommended path for using TidierDB is to import the package so that there are no namespace conflicts with TidierData. Once TidierDB is integrated with Tidier, then Tidier will automatically load the packages in this fashion.</p> <p>First, let's develop and execute a query using TidierDB. Notice that all top-level macros and functions originating from TidierDB start with a <code>DB</code> prefix. Any functions defined within macros do not need to be prefixed within <code>DB</code> because they are actually pseudofunctions that are in actuality converted into SQL code.</p> <p>Even though the code reads similarly to TidierData, note that no computational work actually occurs until you run <code>DB.@collect()</code>, which runs the SQL query and instantiates the result as a DataFrame.</p> <pre><code>using TidierData\nimport TidierDB as DB\n\ndb = DB.connect(DB.duckdb());\npath_or_name = \"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\"\n\n@chain DB.db_table(db, path_or_name) begin\n    DB.@filter(!starts_with(model, \"M\"))\n    DB.@group_by(cyl)\n    DB.@summarize(mpg = mean(mpg))\n    DB.@mutate(mpg_squared = mpg^2, \n               mpg_rounded = round(mpg), \n               mpg_efficiency = case_when(\n                                 mpg &gt;= cyl^2 , \"efficient\",\n                                 mpg &lt; 15.2 , \"inefficient\",\n                                 \"moderate\"))            \n    DB.@filter(mpg_efficiency in (\"moderate\", \"efficient\"))\n    DB.@arrange(desc(mpg_rounded))\n    DB.@collect\nend\n</code></pre> <pre><code>2\u00d75 DataFrame\n Row \u2502 cyl     mpg       mpg_squared  mpg_rounded  mpg_efficiency \n     \u2502 Int64?  Float64?  Float64?     Float64?     String?        \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      4   27.3444      747.719         27.0  efficient\n   2 \u2502      6   19.7333      389.404         20.0  moderate\n</code></pre> <p></p> <p></p>"},{"location":"#what-if-we-wanted-to-pivot-the-result","title":"What if we wanted to pivot the result?","text":"<p>We cannot do this using TidierDB. However, we can call <code>@pivot_longer()</code> from TidierData after the result of the query has been instantiated as a DataFrame, like this: </p> <pre><code>@chain DB.db_table(db, path_or_name) begin\n    DB.@filter(!starts_with(model, \"M\"))\n    DB.@group_by(cyl)\n    DB.@summarize(mpg = mean(mpg))\n    DB.@mutate(mpg_squared = mpg^2, \n               mpg_rounded = round(mpg), \n               mpg_efficiency = case_when(\n                                 mpg &gt;= cyl^2 , \"efficient\",\n                                 mpg &lt; 15.2 , \"inefficient\",\n                                 \"moderate\"))            \n    DB.@filter(mpg_efficiency in (\"moderate\", \"efficient\"))\n    DB.@arrange(desc(mpg_rounded))\n    DB.@collect\n    @pivot_longer(everything(), names_to = \"variable\", values_to = \"value\")\nend\n</code></pre> <pre><code>10\u00d72 DataFrame\n Row \u2502 variable        value     \n     \u2502 String          Any       \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 cyl             4\n   2 \u2502 cyl             6\n   3 \u2502 mpg             27.3444\n   4 \u2502 mpg             19.7333\n   5 \u2502 mpg_squared     747.719\n   6 \u2502 mpg_squared     389.404\n   7 \u2502 mpg_rounded     27.0\n   8 \u2502 mpg_rounded     20.0\n   9 \u2502 mpg_efficiency  efficient\n  10 \u2502 mpg_efficiency  moderate\n</code></pre> <p></p> <p></p>"},{"location":"#what-sql-query-does-tidierdb-generate-for-a-given-piece-of-julia-code","title":"What SQL query does TidierDB generate for a given piece of Julia code?","text":"<p>We can replace <code>DB.collect()</code> with <code>DB.@show_query</code> to reveal the underlying SQL query being generated by TidierDB. To handle complex queries, TidierDB makes heavy use of Common Table Expressions (CTE), which are a useful tool to organize long queries.</p> <pre><code>@chain DB.db_table(db, path_or_name) begin\n    DB.@filter(!starts_with(model, \"M\"))\n    DB.@group_by(cyl)\n    DB.@summarize(mpg = mean(mpg))\n    DB.@mutate(mpg_squared = mpg^2, \n               mpg_rounded = round(mpg), \n               mpg_efficiency = case_when(\n                                 mpg &gt;= cyl^2 , \"efficient\",\n                                 mpg &lt; 15.2 , \"inefficient\",\n                                 \"moderate\"))            \n    DB.@filter(mpg_efficiency in (\"moderate\", \"efficient\"))\n    DB.@arrange(desc(mpg_rounded))\n    DB.@show_query\nend\n</code></pre> <pre><code>WITH cte_1 AS (\nSELECT *\n        FROM mtcars\n        WHERE NOT (starts_with(model, 'M'))),\ncte_2 AS (\nSELECT cyl, AVG(mpg) AS mpg\n        FROM cte_1\n        GROUP BY cyl),\ncte_3 AS (\nSELECT  cyl, mpg, POWER(mpg, 2) AS mpg_squared, ROUND(mpg) AS mpg_rounded, CASE WHEN mpg &gt;= POWER(cyl, 2) THEN 'efficient' WHEN mpg &lt; 15.2 THEN 'inefficient' ELSE 'moderate' END AS mpg_efficiency\n        FROM cte_2 ),\ncte_4 AS (\nSELECT *\n        FROM cte_3\n        WHERE mpg_efficiency in ('moderate', 'efficient'))  \nSELECT *\n        FROM cte_4  \n        ORDER BY mpg_rounded DESC\n</code></pre> <p></p> <p></p>"},{"location":"#tidierdb-is-already-quite-fully-featured-supporting-advanced-tidierdata-functions-like-across-for-multi-column-selection","title":"TidierDB is already quite fully-featured, supporting advanced TidierData functions like <code>across()</code> for multi-column selection.","text":"<pre><code>@chain DB.db_table(db, path_or_name) begin\n    DB.@group_by(cyl)\n    DB.@summarize(across((starts_with(\"a\"), ends_with(\"s\")), (mean, sum)))\n    DB.@collect\nend\n</code></pre> <pre><code>3\u00d75 DataFrame\n Row \u2502 cyl     mean_am   mean_vs   sum_am   sum_vs  \n     \u2502 Int64?  Float64?  Float64?  Int128?  Int128? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      4  0.727273  0.909091        8       10\n   2 \u2502      6  0.428571  0.571429        3        4\n   3 \u2502      8  0.142857  0.0             2        0\n</code></pre> <p>Bang bang <code>!!</code> interpolation for columns and values is also supported.</p> <p>There are a few subtle but important differences from Tidier.jl outlined here.</p> <p></p> <p></p>"},{"location":"#missing-a-function-or-backend","title":"Missing a function or backend?","text":"<p>You can use any existing SQL function within <code>@mutate</code> with the correct SQL syntax and it should just work.</p> <p>But if you run into problems please open an issue, and we will be happy to take a look!</p>"},{"location":"reference/","title":"Reference","text":""},{"location":"reference/#index","title":"Index","text":"<ul> <li><code>TidierDB.connect</code></li> <li><code>TidierDB.copy_to</code></li> <li><code>TidierDB.db_table</code></li> <li><code>TidierDB.@anti_join</code></li> <li><code>TidierDB.@arrange</code></li> <li><code>TidierDB.@collect</code></li> <li><code>TidierDB.@count</code></li> <li><code>TidierDB.@distinct</code></li> <li><code>TidierDB.@filter</code></li> <li><code>TidierDB.@full_join</code></li> <li><code>TidierDB.@group_by</code></li> <li><code>TidierDB.@head</code></li> <li><code>TidierDB.@inner_join</code></li> <li><code>TidierDB.@interpolate</code></li> <li><code>TidierDB.@left_join</code></li> <li><code>TidierDB.@mutate</code></li> <li><code>TidierDB.@rename</code></li> <li><code>TidierDB.@right_join</code></li> <li><code>TidierDB.@select</code></li> <li><code>TidierDB.@semi_join</code></li> <li><code>TidierDB.@slice_max</code></li> <li><code>TidierDB.@slice_min</code></li> <li><code>TidierDB.@slice_sample</code></li> <li><code>TidierDB.@summarise</code></li> <li><code>TidierDB.@summarize</code></li> <li><code>TidierDB.@window_frame</code></li> <li><code>TidierDB.@window_order</code></li> </ul>"},{"location":"reference/#reference-exported-functions","title":"Reference - Exported functions","text":"<p># <code>TidierDB.connect</code> \u2014 Method.</p> <pre><code>connect(backend; kwargs...)\n</code></pre> <p>This function establishes a database connection based on the specified backend and connection parameters and sets the SQL mode</p> <p>Arguments</p> <ul> <li> <p><code>backend</code>: type specifying the database backend to connect to. Supported backends are:</p> <ul> <li><code>duckdb()</code>, <code>sqlite()</code>(SQLite), <code>mssql()</code>, <code>mysql()</code>(for MariaDB and MySQL), <code>clickhouse()</code>, <code>postgres()</code></li> <li> <p><code>kwargs</code>: Keyword arguments specifying the connection parameters for the selected backend. The required parameters vary depending on the backend:</p> </li> <li> <p>MySQL:</p> <ul> <li><code>host</code>: The host name or IP address of the MySQL server. Default is \"localhost\".</li> <li><code>user</code>: The username for authentication. Default is an empty string.</li> <li><code>password</code>: The password for authentication.</li> <li><code>db</code>: The name of the database to connect to (optional).</li> <li><code>port</code>: The port number of the MySQL server (optional).</li> </ul> </li> </ul> </li> </ul> <p>Returns</p> <ul> <li>A database connection object based on the selected backend.</li> </ul> <p>Examples</p> <pre><code># Connect to MySQL\n# conn = connect(mysql(); host=\"localhost\", user=\"root\", password=\"password\", db=\"mydb\")\n# Connect to PostgreSQL using LibPQ\n# conn = connect(postgres(); host=\"localhost\", dbname=\"mydb\", user=\"postgres\", password=\"password\")\n# Connect to ClickHouse\n# conn = connect(clickhouse(); host=\"localhost\", port=9000, database=\"mydb\", user=\"default\", password=\"\")\n# Connect to SQLite\n# conn = connect(sqlite())\n# Connect to Google Big Query\n# conn = connect(gbq(), \"json_user_key_path\", \"project_id\")\n# Connect to Snowflake\n# conn = connect(snowflake(), \"ac_id\", \"token\", \"Database_name\", \"Schema_name\", \"warehouse_name\")\n# Connect to DuckDB\n# connect to Google Cloud via DuckDB\n# google_db = connect(duckdb(), :gbq, access_key=\"string\", secret_key=\"string\")\n# Connect to AWS via DuckDB\n# aws_db = connect2(duckdb(), :aws, aws_access_key_id=get(ENV, \"AWS_ACCESS_KEY_ID\", \"access_key\"), aws_secret_access_key=get(ENV, \"AWS_SECRET_ACCESS_KEY\", \"secret_access key\"), aws_region=get(ENV, \"AWS_DEFAULT_REGION\", \"us-east-1\"))\n# Connect to MotherDuck\n# connect(duckdb(), \"token\") for first connection, vs connect(duckdb(), \"md:\") for reconnection\njulia&gt; db = connect(duckdb())\nDuckDB.Connection(\":memory:\")\n</code></pre> <p>source</p> <p># <code>TidierDB.copy_to</code> \u2014 Method.</p> <pre><code>   copy_to(conn, df_or_path, \"name\")\n</code></pre> <p>Allows user to copy a df to the database connection. Currently supports DuckDB, SQLite, MySql</p> <p>Arguments</p> <p>-<code>conn</code>: the database connection -<code>df</code>: dataframe to be copied or path to serve as source. With DuckDB, path supports .csv, .json, .parquet to be used without copying intermediary df. -<code>name</code>: name as string for the database to be used</p> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"test\");\n</code></pre> <p>source</p> <p># <code>TidierDB.db_table</code> \u2014 Function.</p> <pre><code>db_table(database, table_name, athena_params, delta = false, iceberg = false)\n</code></pre> <p><code>db_table</code> starts the underlying SQL query struct, adding the metadata and table. If paths are passed directly to db_table instead of a  name it will not copy it to memory, but rather ready directly from the file.</p> <p>Arguments</p> <ul> <li><code>database</code>: The Database or connection object</li> <li> <p><code>table_name</code>: tablename as a string (name, local path, or URL).     - CSV/TSV       - Parquet     - Json      - Iceberg     - Delta     - S3 tables from AWS or Google Cloud </p> <ul> <li>DuckDB and ClickHouse support vectors of paths and URLs.</li> <li>DuckDB and ClickHouse also support use of <code>*</code> wildcards to read all files of a type in a location such as:</li> <li><code>db_table(db, \"Path/to/testing_files/*.parquet\")</code></li> <li><code>delta</code>: must be true to read delta files</li> <li><code>iceberg</code>: must be true to read iceberg finalize_ctes</li> </ul> </li> </ul> <p>Example</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; db_table(db, \"df_mem\")\nTidierDB.SQLQuery(\"\", \"df_mem\", \"\", \"\", \"\", \"\", \"\", \"\", false, false, 4\u00d74 DataFrame\n Row \u2502 name     type     current_selxn  table_name \n     \u2502 String?  String?  Int64          String     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 id       VARCHAR              1  df_mem\n   2 \u2502 groups   VARCHAR              1  df_mem\n   3 \u2502 value    BIGINT               1  df_mem\n   4 \u2502 percent  DOUBLE               1  df_mem, false, DuckDB.Connection(\":memory:\"), TidierDB.CTE[], 0, nothing)\n</code></pre> <p>source</p> <p># <code>TidierDB.@anti_join</code> \u2014 Macro.</p> <pre><code>@anti_join(sql_query, join_table, new_table_col, orignal_table_col)\n</code></pre> <p>Perform an anti join between two SQL queries based on a specified condition.  This syntax here is slightly different than TidierData.jl, however, because  SQL does not drop the joining column, for the metadata storage, it is  preferrable for the names to be different </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The primary SQL query to operate on.</li> <li><code>join_table</code>: The secondary SQL table to join with the primary query table.</li> <li><code>new_table_col</code>: Column from the new table that matches for join.</li> <li><code>orignal_table_col</code>: Column from the original table that matches for join.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; copy_to(db, df2, \"df_join\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n        @anti_join(df_join, id2, id)\n        @collect\n       end\n5\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AB       aa            2       0.2\n   2 \u2502 AD       aa            4       0.4\n   3 \u2502 AF       aa            1       0.6\n   4 \u2502 AH       aa            3       0.8\n   5 \u2502 AJ       aa            5       1.0\n</code></pre> <p>source</p> <p># <code>TidierDB.@arrange</code> \u2014 Macro.</p> <pre><code>@arrange(sql_query, columns...)\n</code></pre> <p>Order SQL table rows based on specified column(s).</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>columns</code>: Columns to order the rows by. Can include multiple columns for nested sorting. Wrap column name with <code>desc()</code> for descending order.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @arrange(value, desc(percent))\n         @collect\n       end\n10\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AF       aa            1       0.6\n   2 \u2502 AA       bb            1       0.1\n   3 \u2502 AG       bb            2       0.7\n   4 \u2502 AB       aa            2       0.2\n   5 \u2502 AH       aa            3       0.8\n   6 \u2502 AC       bb            3       0.3\n   7 \u2502 AI       bb            4       0.9\n   8 \u2502 AD       aa            4       0.4\n   9 \u2502 AJ       aa            5       1.0\n  10 \u2502 AE       bb            5       0.5\n</code></pre> <p>source</p> <p># <code>TidierDB.@collect</code> \u2014 Macro.</p> <pre><code>@collect(sql_query, stream = false)\n</code></pre> <p><code>db_table</code> starts the underlying SQL query struct, adding the metadata and table. </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>stream</code>: optional streaming for query/execution of results when using duck db. Defaults to false</li> </ul> <p>Example</p> <pre><code>julia&gt; db = connect(duckdb());\n\njulia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @collect db_table(db, \"df_mem\")\n10\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1\n   2 \u2502 AB       aa            2       0.2\n   3 \u2502 AC       bb            3       0.3\n   4 \u2502 AD       aa            4       0.4\n   5 \u2502 AE       bb            5       0.5\n   6 \u2502 AF       aa            1       0.6\n   7 \u2502 AG       bb            2       0.7\n   8 \u2502 AH       aa            3       0.8\n   9 \u2502 AI       bb            4       0.9\n  10 \u2502 AJ       aa            5       1.0\n</code></pre> <p>source</p> <p># <code>TidierDB.@count</code> \u2014 Macro.</p> <pre><code>@count(sql_query, columns...)\n</code></pre> <p>Count the number of rows grouped by specified column(s).</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>columns</code>: Columns to group by before counting. If no columns are specified, counts all rows in the query.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @count(groups)\n         @arrange(groups)\n         @collect\n       end\n2\u00d72 DataFrame\n Row \u2502 groups   count  \n     \u2502 String?  Int64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa            5\n   2 \u2502 bb            5\n</code></pre> <p>source</p> <p># <code>TidierDB.@distinct</code> \u2014 Macro.</p> <pre><code>@distinct(sql_query, columns...)\n</code></pre> <p>Select distinct rows based on specified column(s). Distinct works differently in TidierData vs SQL and therefore TidierDB. Distinct will also select only the only columns it is given (or all if given none)</p> <p>Arguments</p> <p><code>sql_query</code>: The SQL query to operate on. <code>columns</code>: Columns to determine uniqueness. If no columns are specified, all columns are used to identify distinct rows.</p> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @distinct(value)\n         @arrange(value)\n         @collect\n       end\n5\u00d71 DataFrame\n Row \u2502 value  \n     \u2502 Int64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      1\n   2 \u2502      2\n   3 \u2502      3\n   4 \u2502      4\n   5 \u2502      5\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @distinct\n         @arrange(id)\n         @collect\n       end\n10\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1\n   2 \u2502 AB       aa            2       0.2\n   3 \u2502 AC       bb            3       0.3\n   4 \u2502 AD       aa            4       0.4\n   5 \u2502 AE       bb            5       0.5\n   6 \u2502 AF       aa            1       0.6\n   7 \u2502 AG       bb            2       0.7\n   8 \u2502 AH       aa            3       0.8\n   9 \u2502 AI       bb            4       0.9\n  10 \u2502 AJ       aa            5       1.0\n</code></pre> <p>source</p> <p># <code>TidierDB.@filter</code> \u2014 Macro.</p> <pre><code>@filter(sql_query, conditions...)\n</code></pre> <p>Filter rows in a SQL table based on specified conditions.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to filter rows from.</li> <li> <p><code>conditions</code>: Expressions specifying the conditions that rows must satisfy to be included in the output.                   Rows for which the expression evaluates to <code>true</code> will be included in the result.                   Multiple conditions can be combined using logical operators (<code>&amp;&amp;</code>, <code>||</code>). It will automatically                   detect whether the conditions belong in WHERE vs HAVING. </p> <pre><code>             Temporarily, it is best to use begin and end when filtering multiple conditions. (ex 2 below)\n</code></pre> </li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @filter(percent &gt; .5)\n         @collect\n       end\n5\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AF       aa            1       0.6\n   2 \u2502 AG       bb            2       0.7\n   3 \u2502 AH       aa            3       0.8\n   4 \u2502 AI       bb            4       0.9\n   5 \u2502 AJ       aa            5       1.0\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @summarise(mean = mean(percent))\n         @filter begin \n           groups == \"bb\" || # logical operators can still be used like this\n           mean &gt; .5\n         end\n         @arrange(groups)\n         @collect\n       end\n2\u00d72 DataFrame\n Row \u2502 groups   mean     \n     \u2502 String?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa            0.6\n   2 \u2502 bb            0.5\n</code></pre> <p>source</p> <p># <code>TidierDB.@full_join</code> \u2014 Macro.</p> <pre><code>@inner_join(sql_query, join_table, new_table_col, orignal_table_col)\n</code></pre> <p>Perform an full join between two SQL queries based on a specified condition.  This syntax here is slightly different than TidierData.jl, however, because  SQL does not drop the joining column, for the metadata storage, it is  preferrable for the names to be different </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The primary SQL query to operate on.</li> <li><code>join_table</code>: The secondary SQL table to join with the primary query table.</li> <li><code>new_table_col</code>: Column from the new table that matches for join.</li> <li><code>orignal_table_col</code>: Column from the original table that matches for join.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; copy_to(db, df2, \"df_join\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @full_join(df_join, id2, id)\n         @collect\n       end\n12\u00d77 DataFrame\n Row \u2502 id       groups   value    percent    id2      category  score   \n     \u2502 String?  String?  Int64?   Float64?   String?  String?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb             1        0.1  AA       X              88\n   2 \u2502 AC       bb             3        0.3  AC       Y              92\n   3 \u2502 AE       bb             5        0.5  AE       X              77\n   4 \u2502 AG       bb             2        0.7  AG       Y              83\n   5 \u2502 AI       bb             4        0.9  AI       X              95\n   6 \u2502 AB       aa             2        0.2  missing  missing   missing \n   7 \u2502 AD       aa             4        0.4  missing  missing   missing \n   8 \u2502 AF       aa             1        0.6  missing  missing   missing \n   9 \u2502 AH       aa             3        0.8  missing  missing   missing \n  10 \u2502 AJ       aa             5        1.0  missing  missing   missing \n  11 \u2502 missing  missing  missing  missing    AK       Y              68\n  12 \u2502 missing  missing  missing  missing    AM       X              74\n</code></pre> <p>source</p> <p># <code>TidierDB.@group_by</code> \u2014 Macro.</p> <pre><code>@group_by(sql_query, columns...)\n</code></pre> <p>Group SQL table rows by specified column(s). If grouping is performed as a terminal operation without a subsequent mutatation or summarization (as in the example below), then the resulting data frame will be ungrouped when <code>@collect</code> is applied.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>exprs</code>: Expressions specifying the columns to group by. Columns can be specified by name.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @arrange(groups)\n         @collect\n       end\n2\u00d71 DataFrame\n Row \u2502 groups  \n     \u2502 String? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa\n   2 \u2502 bb\n</code></pre> <p>source</p> <p># <code>TidierDB.@head</code> \u2014 Macro.</p> <pre><code>@head(sql_query, value)\n</code></pre> <p>Limit SQL table number of rows returned based on specified value.  <code>LIMIT</code> in SQL</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>value</code>: Number to limit how many rows are returned.</li> </ul> <p>Examples</p> <pre><code>julia&gt; db = connect(duckdb());\n\njulia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; copy_to(db, df, \"df_mem\");                     \n\njulia&gt; @chain db_table(db, :df_mem) begin\n        @head(1) ## supports expressions ie `3-2` would return the same df below\n        @collect\n       end\n1\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1\n</code></pre> <p>source</p> <p># <code>TidierDB.@inner_join</code> \u2014 Macro.</p> <pre><code>@inner_join(sql_query, join_table, new_table_col, orignal_table_col)\n</code></pre> <p>Perform an inner join between two SQL queries based on a specified condition.  This syntax here is slightly different than TidierData.jl, however, because  SQL does not drop the joining column, for the metadata storage, it is  preferrable for the names to be different </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The primary SQL query to operate on.</li> <li><code>join_table</code>: The secondary SQL table to join with the primary query table.</li> <li><code>new_table_col</code>: Column from the new table that matches for join.</li> <li><code>orignal_table_col</code>: Column from the original table that matches for join.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; copy_to(db, df2, \"df_join\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @inner_join(df_join, id2, id)\n         @collect\n       end\n5\u00d77 DataFrame\n Row \u2502 id       groups   value   percent   id2      category  score  \n     \u2502 String?  String?  Int64?  Float64?  String?  String?   Int64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1  AA       X             88\n   2 \u2502 AC       bb            3       0.3  AC       Y             92\n   3 \u2502 AE       bb            5       0.5  AE       X             77\n   4 \u2502 AG       bb            2       0.7  AG       Y             83\n   5 \u2502 AI       bb            4       0.9  AI       X             95\n</code></pre> <p>source</p> <p># <code>TidierDB.@interpolate</code> \u2014 Macro.</p> <pre><code>@interpolate(args...)\n</code></pre> <p>Interpolate parameters into expressions for database queries.</p> <p>Arguments</p> <ul> <li> <p><code>args...</code>: A variable number of tuples. Each tuple should contain:</p> <ul> <li><code>name</code>: The name of the parameter to interpolate.</li> <li><code>value</code>: (Any): The value/vector to interpolate for the corresponding parameter name.</li> </ul> </li> </ul> <p>Example</p> <pre><code>julia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; col_names = [:id, :value, :percent];\n\njulia&gt; cond1 = .2;\n\njulia&gt; cond2 = 5;\n\njulia&gt; @interpolate((condition1, cond1), (columns, col_names), (condition2, cond2));\n\njulia&gt; @chain db_table(db, \"df_mem\") begin \n          @select(!!columns)\n          @filter begin \n              percent &lt; !!condition1\n              value &lt; !!condition2\n          end\n          @collect\n          end\n1\u00d73 DataFrame\n Row \u2502 id       value   percent  \n     \u2502 String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA            1       0.1\n</code></pre> <p>source</p> <p># <code>TidierDB.@left_join</code> \u2014 Macro.</p> <pre><code>@left_join(sql_query, join_table, new_table_col, orignal_table_col)\n</code></pre> <p>Perform a left join between two SQL queries based on a specified condition.  This syntax here is slightly different than TidierData.jl, however, because  SQL does not drop the joining column, for the metadata storage, it is  preferrable for the names to be different </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The primary SQL query to operate on.</li> <li><code>join_table</code>: The secondary SQL table to join with the primary query table.</li> <li><code>new_table_col</code>: Column from the new table that matches for join.</li> <li><code>orignal_table_col</code>: Column from the original table that matches for join.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; copy_to(db, df2, \"df_join\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @left_join(df_join, id2, id)\n         @collect\n       end\n10\u00d77 DataFrame\n Row \u2502 id       groups   value   percent   id2      category  score   \n     \u2502 String?  String?  Int64?  Float64?  String?  String?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1  AA       X              88\n   2 \u2502 AC       bb            3       0.3  AC       Y              92\n   3 \u2502 AE       bb            5       0.5  AE       X              77\n   4 \u2502 AG       bb            2       0.7  AG       Y              83\n   5 \u2502 AI       bb            4       0.9  AI       X              95\n   6 \u2502 AB       aa            2       0.2  missing  missing   missing \n   7 \u2502 AD       aa            4       0.4  missing  missing   missing \n   8 \u2502 AF       aa            1       0.6  missing  missing   missing \n   9 \u2502 AH       aa            3       0.8  missing  missing   missing \n  10 \u2502 AJ       aa            5       1.0  missing  missing   missing \n</code></pre> <p>source</p> <p># <code>TidierDB.@mutate</code> \u2014 Macro.</p> <pre><code>@mutate(sql_query, exprs...)\n</code></pre> <p>Mutate SQL table rows by adding new columns or modifying existing ones.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>exprs</code>: Expressions for mutating the table. New columns can be added or existing columns modified using column_name = expression syntax, where expression can involve existing columns.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @mutate(value = value * 4, new_col = percent^2)\n         @collect\n       end\n10\u00d75 DataFrame\n Row \u2502 id       groups   value   percent   new_col  \n     \u2502 String?  String?  Int64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            4       0.1      0.01\n   2 \u2502 AB       aa            8       0.2      0.04\n   3 \u2502 AC       bb           12       0.3      0.09\n   4 \u2502 AD       aa           16       0.4      0.16\n   5 \u2502 AE       bb           20       0.5      0.25\n   6 \u2502 AF       aa            4       0.6      0.36\n   7 \u2502 AG       bb            8       0.7      0.49\n   8 \u2502 AH       aa           12       0.8      0.64\n   9 \u2502 AI       bb           16       0.9      0.81\n  10 \u2502 AJ       aa           20       1.0      1.0\n</code></pre> <p>source</p> <p># <code>TidierDB.@rename</code> \u2014 Macro.</p> <pre><code>@rename(sql_query, renamings...)\n</code></pre> <p>Rename one or more columns in a SQL query.</p> <p>Arguments</p> <p>-<code>sql_query</code>: The SQL query to operate on. -<code>renamings</code>: One or more pairs of old and new column names, specified as new name = old name </p> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n       @rename(new_name = percent)\n       @collect\n       end\n10\u00d74 DataFrame\n Row \u2502 id       groups   value   new_name \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1\n   2 \u2502 AB       aa            2       0.2\n   3 \u2502 AC       bb            3       0.3\n   4 \u2502 AD       aa            4       0.4\n   5 \u2502 AE       bb            5       0.5\n   6 \u2502 AF       aa            1       0.6\n   7 \u2502 AG       bb            2       0.7\n   8 \u2502 AH       aa            3       0.8\n   9 \u2502 AI       bb            4       0.9\n  10 \u2502 AJ       aa            5       1.0\n</code></pre> <p>source</p> <p># <code>TidierDB.@right_join</code> \u2014 Macro.</p> <pre><code>@right_join(sql_query, join_table, new_table_col, orignal_table_col)\n</code></pre> <p>Perform a right join between two SQL queries based on a specified condition.  This syntax here is slightly different than TidierData.jl, however, because  SQL does not drop the joining column, for the metadata storage, it is  preferrable for the names to be different </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The primary SQL query to operate on.</li> <li><code>join_table</code>: The secondary SQL table to join with the primary query table.</li> <li><code>new_table_col</code>: Column from the new table that matches for join.</li> <li><code>orignal_table_col</code>: Column from the original table that matches for join.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; copy_to(db, df2, \"df_join\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @right_join(df_join, id2, id)\n         @collect\n       end\n7\u00d77 DataFrame\n Row \u2502 id       groups   value    percent    id2      category  score  \n     \u2502 String?  String?  Int64?   Float64?   String?  String?   Int64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb             1        0.1  AA       X             88\n   2 \u2502 AC       bb             3        0.3  AC       Y             92\n   3 \u2502 AE       bb             5        0.5  AE       X             77\n   4 \u2502 AG       bb             2        0.7  AG       Y             83\n   5 \u2502 AI       bb             4        0.9  AI       X             95\n   6 \u2502 missing  missing  missing  missing    AK       Y             68\n   7 \u2502 missing  missing  missing  missing    AM       X             74\n</code></pre> <p>source</p> <p># <code>TidierDB.@select</code> \u2014 Macro.</p> <pre><code>@select(sql_query, columns)\n</code></pre> <p>Select specified columns from a SQL table.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to select columns from.</li> <li><code>columns</code>: Expressions specifying the columns to select. Columns can be specified by name,                and new columns can be created with expressions using existing column values.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @select(groups:percent)\n         @collect\n       end\n10\u00d73 DataFrame\n Row \u2502 groups   value   percent  \n     \u2502 String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 bb            1       0.1\n   2 \u2502 aa            2       0.2\n   3 \u2502 bb            3       0.3\n   4 \u2502 aa            4       0.4\n   5 \u2502 bb            5       0.5\n   6 \u2502 aa            1       0.6\n   7 \u2502 bb            2       0.7\n   8 \u2502 aa            3       0.8\n   9 \u2502 bb            4       0.9\n  10 \u2502 aa            5       1.0\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @select(contains(\"e\"))\n         @collect\n       end\n10\u00d72 DataFrame\n Row \u2502 value   percent  \n     \u2502 Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      1       0.1\n   2 \u2502      2       0.2\n   3 \u2502      3       0.3\n   4 \u2502      4       0.4\n   5 \u2502      5       0.5\n   6 \u2502      1       0.6\n   7 \u2502      2       0.7\n   8 \u2502      3       0.8\n   9 \u2502      4       0.9\n  10 \u2502      5       1.0\n</code></pre> <p>source</p> <p># <code>TidierDB.@semi_join</code> \u2014 Macro.</p> <pre><code>@semi_join(sql_query, join_table, new_table_col, orignal_table_col)\n</code></pre> <p>Perform an semi join between two SQL queries based on a specified condition.  This syntax here is slightly different than TidierData.jl, however, because  SQL does not drop the joining column, for the metadata storage, it is  preferrable for the names to be different </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The primary SQL query to operate on.</li> <li><code>join_table</code>: The secondary SQL table to join with the primary query table.</li> <li><code>new_table_col</code>: Column from the new table that matches for join.</li> <li><code>orignal_table_col</code>: Column from the original table that matches for join.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; copy_to(db, df2, \"df_join\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @semi_join(df_join, id2, id)\n         @collect\n       end\n5\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1\n   2 \u2502 AC       bb            3       0.3\n   3 \u2502 AE       bb            5       0.5\n   4 \u2502 AG       bb            2       0.7\n   5 \u2502 AI       bb            4       0.9\n</code></pre> <p>source</p> <p># <code>TidierDB.@slice_max</code> \u2014 Macro.</p> <pre><code>@slice_max(sql_query, column, n = 1)\n</code></pre> <p>Select rows with the largest values in specified column. This will always return ties. </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>column</code>: Column to identify the smallest values.</li> <li><code>n</code>: The number of rows to select with the largest values for each specified column. Default is 1, which selects the row with the smallest value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @slice_max(value, n = 2)\n         @collect\n       end;\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @slice_max(value)\n         @collect\n       end\n2\u00d75 DataFrame\n Row \u2502 id       groups   value   percent   rank_col \n     \u2502 String?  String?  Int64?  Float64?  Int64?   \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AE       bb            5       0.5         1\n   2 \u2502 AJ       aa            5       1.0         1\n</code></pre> <p>source</p> <p># <code>TidierDB.@slice_min</code> \u2014 Macro.</p> <pre><code>@slice_min(sql_query, column, n = 1)\n</code></pre> <p>Select rows with the smallest values in specified column. This will always return ties. </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>column</code>: Column to identify the smallest values.</li> <li><code>n</code>: The number of rows to select with the smallest values for each specified column. Default is 1, which selects the row with the smallest value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @slice_min(value, n = 2)\n         @collect\n       end;\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @slice_min(value)\n         @collect\n       end\n2\u00d75 DataFrame\n Row \u2502 id       groups   value   percent   rank_col \n     \u2502 String?  String?  Int64?  Float64?  Int64?   \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1         1\n   2 \u2502 AF       aa            1       0.6         1\n</code></pre> <p>source</p> <p># <code>TidierDB.@slice_sample</code> \u2014 Macro.</p> <pre><code>@slice_sample(sql_query, n)\n</code></pre> <p>Randomly select a specified number of rows from a SQL table.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>n</code>: The number of rows to randomly select.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @slice_sample(n = 2)\n         @collect\n       end;\n\njulia&gt; @chain db_table(db, :df_mem) begin\n       @slice_sample()\n       @collect\n       end;\n</code></pre> <p>source</p> <p># <code>TidierDB.@summarise</code> \u2014 Macro.</p> <pre><code>   @summarise(sql_query, exprs...)\n</code></pre> <p>Aggregate and summarize specified columns of a SQL table.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>exprs</code>: Expressions defining the aggregation and summarization operations. These can specify simple aggregations like mean, sum, and count, or more complex expressions involving existing column values.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @summarise(across((value:percent), (mean, sum)))\n         @arrange(groups)\n         @collect\n       end\n2\u00d75 DataFrame\n Row \u2502 groups   mean_value  mean_percent  sum_value  sum_percent \n     \u2502 String?  Float64?    Float64?      Int128?    Float64?    \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa              3.0           0.6         15          3.0\n   2 \u2502 bb              3.0           0.5         15          2.5\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @summarise(test = sum(percent), n = n())\n         @arrange(groups)\n         @collect\n       end\n2\u00d73 DataFrame\n Row \u2502 groups   test      n      \n     \u2502 String?  Float64?  Int64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa            3.0       5\n   2 \u2502 bb            2.5       5\n</code></pre> <p>source</p> <p># <code>TidierDB.@summarize</code> \u2014 Macro.</p> <pre><code>   @summarize(sql_query, exprs...)\n</code></pre> <p>Aggregate and summarize specified columns of a SQL table.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>exprs</code>: Expressions defining the aggregation and summarization operations. These can specify simple aggregations like mean, sum, and count, or more complex expressions involving existing column values.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @summarise(across((ends_with(\"e\"), starts_with(\"p\")), (mean, sum)))\n         @arrange(groups)\n         @collect\n       end\n2\u00d75 DataFrame\n Row \u2502 groups   mean_value  mean_percent  sum_value  sum_percent \n     \u2502 String?  Float64?    Float64?      Int128?    Float64?    \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa              3.0           0.6         15          3.0\n   2 \u2502 bb              3.0           0.5         15          2.5\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @summarise(test = sum(percent), n = n())\n         @arrange(groups)\n         @collect\n       end\n2\u00d73 DataFrame\n Row \u2502 groups   test      n      \n     \u2502 String?  Float64?  Int64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa            3.0       5\n   2 \u2502 bb            2.5       5\n</code></pre> <p>source</p> <p># <code>TidierDB.@window_frame</code> \u2014 Macro.</p> <pre><code>@window_frame(sql_query, frame_start::Int, frame_end::Int)\n</code></pre> <p>Define the window frame for window functions in a SQL query, specifying the range of rows to include in the calculation relative to the current row.</p> <p>Arguments</p> <p>sql_query: The SQL query to operate on, expected to be an instance of SQLQuery.</p> <ul> <li><code>frame_start</code>: The starting point of the window frame. A positive value indicates the start after the current row (FOLLOWING), a negative value indicates before the current row (PRECEDING), and 0 indicates the current row.</li> <li><code>frame_end</code>: The ending point of the window frame. A positive value indicates the end after the current row (FOLLOWING), a negative value indicates before the current row (PRECEDING), and 0 indicates the current row.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n</code></pre> <p>source</p> <p># <code>TidierDB.@window_order</code> \u2014 Macro.</p> <pre><code>   @window_order(sql_query, columns...)\n</code></pre> <p>Specify the order of rows for window functions within a SQL query.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>columns</code>: Columns to order the rows by for the window function. Can include multiple columns for nested sorting. Prepend a column name with - for descending order.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n</code></pre> <p>source</p> <p></p> <p></p>"},{"location":"reference/#reference-internal-functions","title":"Reference - Internal functions","text":""},{"location":"examples/generated/UserGuide/Snowflake/","title":"Using Snowflake","text":"<p>Establishing a connection with the Snowflake SQL Rest API requires a OAuth token specific to the Role the user will use to query tables with.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/Snowflake/#connecting","title":"Connecting","text":"<p>Connection is established with the <code>connect</code> function as shown below. Connection requires 5 items as strings</p> <ul> <li>Account Identifier</li> <li>OAuth token</li> <li>Database Name</li> <li>Schema Name</li> <li>Compute Warehouse name</li> </ul> <p>Two things to note:</p> <ul> <li>Your OAuth Token may frequently expire, which may require you to rerun your connection line.</li> <li> <p>Since each time <code>db_table</code> runs, it runs a query to pull the metadata, you may choose to use run <code>db_table</code> and save the results, and use these results with<code>from_query()</code></p> <ul> <li>This will reduce the number of queries to your database</li> <li>Allow you to build a a SQL query and <code>@show_query</code> even if the OAuthtoken has expired. To <code>@collect</code> you will have to reconnect and rerun dbtable if your OAuth token has expired</li> </ul> </li> </ul> <pre><code>set_sql_mode(snowflake())\nac_id = \"string_id\"\ntoken = \"OAuth_token_string\"\ncon = connect(:snowflake, ac_id, token, \"DEMODB\", \"PUBLIC\", \"COMPUTE_WH\")\n# After connection is established, a you may begin querying.\nstable_table_metadata = db_table(con, \"MTCARS\")\n@chain from_query(stable_table_metadata) begin\n   @select(WT)\n   @mutate(TEST = WT *2)\n   #@aside @show_query _\n   @collect\nend\n</code></pre> <pre><code>32\u00d72 DataFrame\n Row \u2502 WT       TEST\n     \u2502 Float64  Float64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502   2.62     5.24\n   2 \u2502   2.875    5.75\n   3 \u2502   2.32     4.64\n   4 \u2502   3.215    6.43\n  \u22ee  \u2502    \u22ee        \u22ee\n  29 \u2502   3.17     6.34\n  30 \u2502   2.77     5.54\n  31 \u2502   3.57     7.14\n  32 \u2502   2.78     5.56\n         24 rows omitted\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/athena/","title":"Using Athena","text":"<p>To use the Athena AWS backend with TidierDB, set up and a small syntax difference are covered here.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/athena/#connecting","title":"Connecting","text":"<p>Connection is established through AWS.jl as shwon below.</p> <pre><code>using TidierDB, AWS\nset_sql_mode(athena())\n# Replace your credentials as needed below\naws_access_key_id = get(ENV,\"AWS_ACCESS_KEY_ID\",\"key\")\naws_secret_access_key = get(ENV, \"AWS_SECRET_ACCESS_KEY\",\"secret_key\")\naws_region = get(ENV,\"AWS_DEFAULT_REGION\",\"region\")\n\nconst AWS_GLOBAL_CONFIG = Ref{AWS.AWSConfig}()\ncreds = AWSCredentials(aws_access_key_id, aws_secret_access_key)\n\nAWS_GLOBAL_CONFIG[] = AWS.global_aws_config(region=aws_region, creds=creds)\n\ncatalog = \"AwsDataCatalog\"\nworkgroup = \"primary\"\ndb = \"demodb\"\nall_results = true\nresults_per_increment = 10\nout_loc = \"s3://location/\"\n\nathena_params = Dict(\n    \"ResultConfiguration\" =&gt; Dict(\n        \"OutputLocation\" =&gt; out_loc\n    ),\n    \"QueryExecutionContext\" =&gt; Dict(\n        \"Database\" =&gt; db,\n        \"Catalog\" =&gt; catalog\n    ),\n    \"Workgroup\" =&gt; workgroup\n)\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/athena/#db_table-differences","title":"<code>db_table</code> differences","text":"<p>There are two differences for <code>db_table</code> which are seen in the query below</p> <ol> <li>The table needs to be passed as a string in the format database.table, ie <code>\"demodb.table_name</code></li> <li><code>db_table</code> requires a third argument: the athena_params from above.</li> </ol> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/athena/#leveraging-from_query-with-athena-to-reduce-number-of-queries","title":"Leveraging <code>from_query</code> with Athena to reduce number of queries","text":"<p>Throughout TidierDB, each time <code>db_table</code> is called, it queries the databases to get the metadata. Consider how AWS Athena logs queries, a user may want to reduce the number of queries. This can be done saving the results of <code>db_table</code>, and then using from_query with those results for furthe queries as shown below.</p> <pre><code>mtcars = db_table(AWS_GLOBAL_CONFIG[], \"demodb.mtcars\", athena_params)\n@chain from_query(mtcars) begin\n    @filter(cyl &gt; 4)\n    @group_by(cyl)\n    @summarize(mpg = mean(mpg))\n   #@show_query\n    @collect\nend\n</code></pre> <pre><code>2\u00d72 DataFrame\n Row \u2502 cyl    mpg\n     \u2502 Int64  Float64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     6  19.7429\n   2 \u2502     8  15.1\n</code></pre> <p>I would like to acknowledge the work of Manu Francis and this blog post, which helped guide this process</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/databricks/","title":"Using Databricks","text":"<p>Establishing a connection with the Databricks SQL Rest API requires a token.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/databricks/#connecting","title":"Connecting","text":"<p>Connection is established with the <code>connect</code> function as shown below. Connection requires 5 items as strings</p> <ul> <li>Account Instance : how to find your instance</li> <li>OAuth token : how to generate your token</li> <li>Database Name</li> <li>Schema Name</li> <li>warehouse_id</li> </ul> <p>One thing to note, Since each time <code>db_table</code> runs, it runs a query to pull the metadata, you may choose to use run <code>db_table</code> and save the results, and use these results with <code>from_query()</code>. This will reduce the number of queries to your database and is illustrated below.</p> <pre><code>set_sql_mode(databricks())\ninstance_id = \"string_id\"\ntoken \"string_token\"\nwarehouse_id = \"e673cd4f387f964a\"\ncon = connect(:databricks, instance_id, token, \"DEMODB\", \"PUBLIC\", warehouse_id)\n# After connection is established, a you may begin querying.\nstable_table_metadata = db_table(con, \"mtcars\")\n@chain from_query(stable_table_metadata) begin\n   @select(wt)\n   @mutate(test = wt *2)\n   #@aside @show_query _\n   @collect\nend\n</code></pre> <pre><code>32\u00d72 DataFrame\n Row \u2502 wt       test\n     \u2502 Float64  Float64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502   2.62     5.24\n   2 \u2502   2.875    5.75\n   3 \u2502   2.32     4.64\n   4 \u2502   3.215    6.43\n  \u22ee  \u2502    \u22ee        \u22ee\n  29 \u2502   3.17     6.34\n  30 \u2502   2.77     5.54\n  31 \u2502   3.57     7.14\n  32 \u2502   2.78     5.56\n         24 rows omitted\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/from_queryex/","title":"Reusing Part of a Query","text":"<p>While using TidierDB, you may need to generate part of a query and reuse it multiple times. <code>from_query()</code> enables a query portion to be reused multiple times as shown below.</p> <pre><code>import TidierDB as DB\ncon = DB.connect(duckdb())\nmtcars_path = \"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\"\n</code></pre> <p>Start a query to analyze fuel efficiency by number of cylinders. However, to further build on this query later, end the chain without using <code>@show_query</code> or <code>@collect</code></p> <pre><code>query = DB.@chain DB.db_table(con, mtcars_path) begin\n    DB.@group_by cyl\n    DB.@summarize begin\n        across(mpg, (mean, minimum, maximum))\n        num_cars = n()\n        end\n    DB.@mutate begin\n        efficiency = case_when(\n            mean_mpg &gt;= 25, \"High\",\n            mean_mpg &gt;= 15, \"Moderate\",\n            \"Low\" )\n       end\nend;\n</code></pre> <p>Now, <code>from_query</code> will allow you to reuse the query to calculate the average horsepower for each efficiency category</p> <pre><code>DB.@chain DB.from_query(query) begin\n   DB.@left_join(mtcars2, cyl, cyl)\n   DB.@group_by(efficiency)\n   DB.@summarize(avg_hp = mean(hp))\n   DB.@collect\nend\n</code></pre> <pre><code>2\u00d72 DataFrame\n Row \u2502 efficiency  avg_hp\n     \u2502 String?     Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Moderate    180.238\n   2 \u2502 High         82.6364\n</code></pre> <p>Reuse the query again to find the car with the highest MPG for each cylinder category</p> <pre><code>DB.@chain DB.from_query(query) begin\n   DB.@left_join(mtcars2, cyl, cyl)\n   DB.@group_by cyl\n   DB.@slice_max(mpg)\n   DB.@select model cyl mpg\n   DB.@collect\nend\n</code></pre> <pre><code>3\u00d73 DataFrame\n Row \u2502 model             cyl     mpg\n     \u2502 String?           Int64?  Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Pontiac Firebird       8      19.2\n   2 \u2502 Toyota Corolla         4      33.9\n   3 \u2502 Hornet 4 Drive         6      21.4\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/from_queryex/#preview-or-save-an-intermediate-table","title":"Preview or save an intermediate table","text":"<p>While querying a dataset, you may wish to see an intermediate table, or even save it. You can use <code>@aside</code> and <code>from_query(_)</code>, illustrated below, to do just that. While we opted to print the results in this simple example below, we could have saved them by using <code>name = DB.@chain...</code></p> <pre><code>import ClickHouse;\nconn = conn = DB.connect(DB.clickhouse(); host=\"localhost\", port=19000, database=\"default\", user=\"default\", password=\"\")\npath = \"https://huggingface.co/datasets/maharshipandya/spotify-tracks-dataset/resolve/refs%2Fconvert%2Fparquet/default/train/0000.parquet\"\nDB.@chain DB.db_table(conn, path) begin\n   DB.@count(cyl)\n   @aside println(DB.@chain DB.from_query(_) DB.@head(5) DB.@collect)\n   DB.@arrange(desc(count))\n   DB.@collect\nend\n</code></pre> <pre><code>5\u00d72 DataFrame\n Row \u2502 artists  count\n     \u2502 String?  UInt64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing       1\n   2 \u2502 Wizo          3\n   3 \u2502 MAGIC!        3\n   4 \u2502 Macaco        1\n   5 \u2502 SOYOU         1\n31438\u00d72 DataFrame\n   Row \u2502 artists          count\n       \u2502 String?          UInt64\n\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n     1 \u2502 The Beatles         279\n     2 \u2502 George Jones        271\n     3 \u2502 Stevie Wonder       236\n     4 \u2502 Linkin Park         224\n     5 \u2502 Ella Fitzgerald     222\n     6 \u2502 Prateek Kuhad       217\n     7 \u2502 Feid                202\n   \u22ee   \u2502        \u22ee           \u22ee\n 31432 \u2502 Leonard               1\n 31433 \u2502 marcos g              1\n 31434 \u2502 BLVKSHP               1\n 31435 \u2502 Memtrix               1\n 31436 \u2502 SOYOU                 1\n 31437 \u2502 Macaco                1\n 31438 \u2502 missing               1\n               31424 rows omitted\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/functions_pass_to_DB/","title":"Writing Functions/Macros with TidierDB Chains","text":"<p>How can functions pass arguments to a TidierDB chain?</p> <p>In short, you have to use a macro instead in conjuction with <code>@interpolate</code></p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/functions_pass_to_DB/#setting-up-the-macro","title":"Setting up the macro","text":"<p>To write a macro that will take arguments and pass them to a TidierDB chain, there are 3 steps:</p> <ol> <li>Write macro with the desired argument(s), and, after the quote, add the chain. Arguments to be changed/interpolated must be prefixed with <code>!!</code></li> <li>Use <code>@interpolate</code> to make these arguemnts accessible to the chain. <code>@interpolate</code> takes touples as argument (one for the <code>!!</code>name, and one for the actual content you want the chain to use)</li> <li>Run <code>@interpolate</code> and then the chain macro sequentially</li> </ol> <pre><code>using TidierDB\ndb = connect(duckdb())\npath = \"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\"\ncopy_to(db, path, \"mtcars\");\n\n# STEP 1\nmacro f1(conditions, columns) # The arguemnt names will be names of the `!!` values\n    return quote\n    # add chain here\n      @chain db_table(db, :mtcars) begin\n           @filter(!!conditions &gt; 3)\n           @select(!!columns)\n           @aside @show_query _\n           @collect\n         end # ends the chain\n    end # ends the quote.\nend # ends the macro\n</code></pre> <pre><code># STEP 2\nvariable = :gear;\ncols = [:model, :mpg, :gear, :wt];\n@interpolate((conditions, variable), (columns, cols));\n@f1(variable, cols)\n</code></pre> <pre><code>17\u00d74 DataFrame\n Row \u2502 model           mpg       gear    wt\n     \u2502 String?         Float64?  Int32?  Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Mazda RX4           21.0       4     2.62\n   2 \u2502 Mazda RX4 Wag       21.0       4     2.875\n   3 \u2502 Datsun 710          22.8       4     2.32\n  \u22ee  \u2502       \u22ee            \u22ee        \u22ee        \u22ee\n  15 \u2502 Ferrari Dino        19.7       5     2.77\n  16 \u2502 Maserati Bora       15.0       5     3.57\n  17 \u2502 Volvo 142E          21.4       4     2.78\n                                   11 rows omitted\n</code></pre> <p>Lets say you wanted to filter on new variable with a different name and select new columns,</p> <pre><code>new_condition = :wt;\nnew_cols = [:model, :drat]\n@interpolate((conditions, new_condition), (columns, new_cols));\n@f1(new_condition, new_cols)\n</code></pre> <pre><code>20\u00d72 DataFrame\n Row \u2502 model              drat\n     \u2502 String?            Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Hornet 4 Drive         3.08\n   2 \u2502 Hornet Sportabout      3.15\n   3 \u2502 Valiant                2.76\n  \u22ee  \u2502         \u22ee             \u22ee\n  18 \u2502 Pontiac Firebird       3.08\n  19 \u2502 Ford Pantera L         4.22\n  20 \u2502 Maserati Bora          3.54\n                    14 rows omitted\n</code></pre> <p>You can also interpolate vectors of strings into a <code>@filter(col in (values))</code> as well by using the following syntax <code>@filter(col in [!!values])</code></p> <p>In short, the first argument in <code>@interpolate</code> must be the name of the macro argument it refers to, and the second argument is what you would like to replace it.</p> <p>We recognize this adds friction and that it is not ideal, but given the TidierDB macro expressions/string interplay, this is currently the most graceful and functional option available and hopefully a temporary solution to better interpolation that mirrors TidierData.jl.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/getting_started/","title":"Getting Started","text":"<p>To use TidierDB.jl, you will have to set up a connection. TidierDB.jl gives you access to duckdb via <code>duckdb_open</code> and <code>duckdb_connect</code>. However, to use MySql, ClickHouse, MSSQL, Postgres, or SQLite, you will have to load those packages in first.</p> <p>If you plan to use TidierDB.jl with TidierData.jl or Tidier.jl, it is most convenenient to load the packages as follows:</p> <pre><code>using TidierData\nimport TidierDB as DB\n</code></pre> <p>Alternatively, <code>using Tidier</code> will import TidierDB in the above manner for you, where TidierDB functions and macros will be available as <code>DB.@mutate()</code> and so on, and the TidierData equivalent would be <code>@mutate()</code>.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/getting_started/#connecting","title":"Connecting","text":"<p>To connect to a database, you can uset the <code>connect</code> function  as shown below, or establish your own connection through the respecitve libraries.</p> <p>For example Connecting to MySQL</p> <pre><code>conn = DB.connect(DB.mysql(); host=\"localhost\", user=\"root\", password=\"password\", db=\"mydb\")\n</code></pre> <p>versus connecting to DuckDB</p> <pre><code>conn = DB.connect(DB.duckdb())\n</code></pre> <p>You can also use establish a connection through an alternate method that you preferred, and use that as your connection as well.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/getting_started/#package-extensions","title":"Package Extensions","text":"<p>The following backends utilize package extensions. To use one of backends listed below, you will need to write <code>using Library</code></p> <ul> <li>ClickHouse: <code>import ClickHouse</code></li> <li>MySQL and MariaDB: <code>using MySQL</code></li> <li>MSSQL: <code>using ODBC</code></li> <li>Postgres: <code>using LibPQ</code></li> <li>SQLite: <code>using SQLite</code></li> <li>Athena: <code>using AWS</code></li> <li>Oracle: <code>using ODBC</code></li> <li>Google BigQuery: <code>using GoogleCloud</code></li> </ul> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/getting_started/#db_table","title":"<code>db_table</code>","text":"<p>What does <code>db_table</code> do?</p> <p><code>db_table</code> starts the underlying SQL query struct, in addition to pulling the table metadata and storing it there. Storing metadata is what enables a lazy interface that also supports tidy selection.</p> <ul> <li><code>db_table</code> has two required arguments: <code>connection</code> and <code>table</code></li> <li><code>table</code> can be a table name on a database or a path/url to file to read.  When passing <code>db_table</code> a path or url, the table is not copied into memory.</li> <li>With DuckDB and ClickHouse, if you have a folder of multiple files to read, you can use <code>*</code> read in all files matching the pattern.</li> <li>For example, the below would read all files that end in <code>.csv</code> in the given folder.</li> </ul> <pre><code>db_table(db, \"folder/path/*.csv\")\n</code></pre> <p><code>db_table</code> also supports iceberg, delta, and S3 file paths via DuckDB.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/getting_started/#minimizing-compute-costs","title":"Minimizing Compute Costs","text":"<p>If you are working with a backend where compute cost is important, it will be important to minimize using <code>db_table</code> as this will requery for metadata each time. Compute costs are relevant to backends such as AWS, databricks and Snowflake.</p> <p>To do this, save the results of <code>db_table</code> and use them with <code>from_query</code>. Using <code>from_query</code> pulls the relevant information (metadata, con, etc) from the mutable SQLquery struct, allowing you to repeatedly query and collect the table without requerying for the metadata each time</p> <pre><code>table = DB.db_table(con, \"path\")\n@chain DB.from_query(table) begin\n    ## data wrangling here\nend\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/ibis_comp/","title":"TidierDB.jl vs Ibis","text":""},{"location":"examples/generated/UserGuide/ibis_comp/#comparing-tidierdb-vs-ibis","title":"Comparing TidierDB vs Ibis","text":"<p>TidierDB is a reimplementation of dbplyr from R, so the syntax is remarkably similar. But how does TidierDB compare to Python's Ibis? This page will perform a similar comparison to the Ibis Documentation comparing Ibis and dplyr</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#set-up","title":"Set up","text":"<p>Ibis</p> <pre><code>import ibis\nimport ibis.selectors as s # allows for different styles of column selection\nfrom ibis import _ # eliminates need to type table name before each column vs typing cols as strings\nibis.options.interactive = True # automatically collects first 10 rows of table\n</code></pre> <p>TidierDB</p> <pre><code>using TidierDB\ndb = connect(duckdb())\n# This next line is optional, but it will let us avoid writing `db_table` or `from_query` for each query\nt(table) = from_query(table)\n</code></pre> <p>Of note, TidierDB does not yet have an \"interactive mode\" so each example result will be collected.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#loading-data","title":"Loading Data","text":"<p>With Ibis, there are specific functions to read in different file types</p> <pre><code>mtcars = ibis.read_csv(\"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\")\n</code></pre> <p>In TidierDB, there is only <code>db_table</code>, which determines the file type and generates the syntax appropriate for the backend in use.</p> <pre><code>mtcars = db_table(db, \"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#previewing-the-data","title":"Previewing the data","text":"<p>TidierDB and Ibis use <code>head</code>/<code>@head</code> to preview the first rows of a dataset.</p> <pre><code>mtcars.head(6)\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 model             \u2503 mpg     \u2503 cyl   \u2503 disp    \u2503 hp    \u2503 drat    \u2503 wt      \u2503 qsec    \u2503 vs    \u2503 am    \u2503 gear  \u2503 carb  \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 string            \u2502 float64 \u2502 int64 \u2502 float64 \u2502 int64 \u2502 float64 \u2502 float64 \u2502 float64 \u2502 int64 \u2502 int64 \u2502 int64 \u2502 int64 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 Mazda RX4         \u2502    21.0 \u2502     6 \u2502   160.0 \u2502   110 \u2502    3.90 \u2502   2.620 \u2502   16.46 \u2502     0 \u2502     1 \u2502     4 \u2502     4 \u2502\n\u2502 Mazda RX4 Wag     \u2502    21.0 \u2502     6 \u2502   160.0 \u2502   110 \u2502    3.90 \u2502   2.875 \u2502   17.02 \u2502     0 \u2502     1 \u2502     4 \u2502     4 \u2502\n\u2502 Datsun 710        \u2502    22.8 \u2502     4 \u2502   108.0 \u2502    93 \u2502    3.85 \u2502   2.320 \u2502   18.61 \u2502     1 \u2502     1 \u2502     4 \u2502     1 \u2502\n\u2502 Hornet 4 Drive    \u2502    21.4 \u2502     6 \u2502   258.0 \u2502   110 \u2502    3.08 \u2502   3.215 \u2502   19.44 \u2502     1 \u2502     0 \u2502     3 \u2502     1 \u2502\n\u2502 Hornet Sportabout \u2502    18.7 \u2502     8 \u2502   360.0 \u2502   175 \u2502    3.15 \u2502   3.440 \u2502   17.02 \u2502     0 \u2502     0 \u2502     3 \u2502     2 \u2502\n\u2502 Valiant           \u2502    18.1 \u2502     6 \u2502   225.0 \u2502   105 \u2502    2.76 \u2502   3.460 \u2502   20.22 \u2502     1 \u2502     0 \u2502     3 \u2502     1 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <pre><code>@chain t(mtcars) @head(6) @collect\n</code></pre> <pre><code>6\u00d712 DataFrame\n Row \u2502 model              mpg       cyl     disp      hp      drat      wt        qsec      vs      am      gear    carb\n     \u2502 String?            Float64?  Int64?  Float64?  Int64?  Float64?  Float64?  Float64?  Int64?  Int64?  Int64?  Int64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Mazda RX4              21.0       6     160.0     110      3.9      2.62      16.46       0       1       4       4\n   2 \u2502 Mazda RX4 Wag          21.0       6     160.0     110      3.9      2.875     17.02       0       1       4       4\n   3 \u2502 Datsun 710             22.8       4     108.0      93      3.85     2.32      18.61       1       1       4       1\n   4 \u2502 Hornet 4 Drive         21.4       6     258.0     110      3.08     3.215     19.44       1       0       3       1\n   5 \u2502 Hornet Sportabout      18.7       8     360.0     175      3.15     3.44      17.02       0       0       3       2\n   6 \u2502 Valiant                18.1       6     225.0     105      2.76     3.46      20.22       1       0       3       1\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#filtering","title":"Filtering","text":"<p>The example below demonstrates how to filter using multiple criteria in both Ibis and TidierData</p> <pre><code>mtcars.filter(((_.mpg &gt; 22) &amp; (_.drat &gt; 4) | (_.hp == 113)))\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 model          \u2503 mpg     \u2503 cyl   \u2503 disp    \u2503 hp    \u2503 drat    \u2503 wt      \u2503 qsec    \u2503 vs    \u2503 am    \u2503 gear  \u2503 carb  \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 string         \u2502 float64 \u2502 int64 \u2502 float64 \u2502 int64 \u2502 float64 \u2502 float64 \u2502 float64 \u2502 int64 \u2502 int64 \u2502 int64 \u2502 int64 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 Lotus Europa   \u2502    30.4 \u2502     4 \u2502    95.1 \u2502   113 \u2502    3.77 \u2502   1.513 \u2502   16.90 \u2502     1 \u2502     1 \u2502     5 \u2502     2 \u2502\n\u2502 Fiat 128       \u2502    32.4 \u2502     4 \u2502    78.7 \u2502    66 \u2502    4.08 \u2502   2.200 \u2502   19.47 \u2502     1 \u2502     1 \u2502     4 \u2502     1 \u2502\n\u2502 Honda Civic    \u2502    30.4 \u2502     4 \u2502    75.7 \u2502    52 \u2502    4.93 \u2502   1.615 \u2502   18.52 \u2502     1 \u2502     1 \u2502     4 \u2502     2 \u2502\n\u2502 Toyota Corolla \u2502    33.9 \u2502     4 \u2502    71.1 \u2502    65 \u2502    4.22 \u2502   1.835 \u2502   19.90 \u2502     1 \u2502     1 \u2502     4 \u2502     1 \u2502\n\u2502 Fiat X1-9      \u2502    27.3 \u2502     4 \u2502    79.0 \u2502    66 \u2502    4.08 \u2502   1.935 \u2502   18.90 \u2502     1 \u2502     1 \u2502     4 \u2502     1 \u2502\n\u2502 Porsche 914-2  \u2502    26.0 \u2502     4 \u2502   120.3 \u2502    91 \u2502    4.43 \u2502   2.140 \u2502   16.70 \u2502     0 \u2502     1 \u2502     5 \u2502     2 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>TidierDB</p> <pre><code>@chain t(mtcars) begin\n       @filter((mpg &gt; 22 &amp;&amp; drat &gt; 4) || hp == 113)\n       @collect\nend\n</code></pre> <pre><code>6\u00d712 DataFrame\n Row \u2502 model           mpg       cyl     disp      hp      drat      wt        qsec      vs      am      gear    carb\n     \u2502 String?         Float64?  Int64?  Float64?  Int64?  Float64?  Float64?  Float64?  Int64?  Int64?  Int64?  Int64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Lotus Europa        30.4       4      95.1     113      3.77     1.513     16.9        1       1       5       2\n   2 \u2502 Fiat 128            32.4       4      78.7      66      4.08     2.2       19.47       1       1       4       1\n   3 \u2502 Honda Civic         30.4       4      75.7      52      4.93     1.615     18.52       1       1       4       2\n   4 \u2502 Toyota Corolla      33.9       4      71.1      65      4.22     1.835     19.9        1       1       4       1\n   5 \u2502 Fiat X1-9           27.3       4      79.0      66      4.08     1.935     18.9        1       1       4       1\n   6 \u2502 Porsche 914-2       26.0       4     120.3      91      4.43     2.14      16.7        0       1       5       2\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#creating-new-columns","title":"Creating new columns","text":"<p>Both TidierDB and Ibis use <code>mutate</code>/<code>@mutate</code> to add new columns</p> <pre><code>(\n   mtcars\n        .mutate(kpg = _.mpg * 1.61)\n        .select(\"model\", \"kpg\")\n)\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 model             \u2503 kpg     \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 string            \u2502 float64 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 Mazda RX4         \u2502  33.810 \u2502\n\u2502 Mazda RX4 Wag     \u2502  33.810 \u2502\n\u2502 Datsun 710        \u2502  36.708 \u2502\n\u2502 Hornet 4 Drive    \u2502  34.454 \u2502\n\u2502 Hornet Sportabout \u2502  30.107 \u2502\n\u2502 Valiant           \u2502  29.141 \u2502\n\u2502 Duster 360        \u2502  23.023 \u2502\n\u2502 Merc 240D         \u2502  39.284 \u2502\n\u2502 Merc 230          \u2502  36.708 \u2502\n\u2502 Merc 280          \u2502  30.912 \u2502\n\u2502 \u2026                 \u2502       \u2026 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <pre><code>@chain t(mtcars) begin\n       @mutate(kpg = mpg * 1.61)\n       @select(model, kpg)\n       @collect\nend\n</code></pre> <pre><code>32\u00d72 DataFrame\n Row \u2502 model              kpg\n     \u2502 String?            Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Mazda RX4            33.81\n   2 \u2502 Mazda RX4 Wag        33.81\n   3 \u2502 Datsun 710           36.708\n   4 \u2502 Hornet 4 Drive       34.454\n   5 \u2502 Hornet Sportabout    30.107\n   6 \u2502 Valiant              29.141\n  \u22ee  \u2502         \u22ee             \u22ee\n  27 \u2502 Porsche 914-2        41.86\n  28 \u2502 Lotus Europa         48.944\n  29 \u2502 Ford Pantera L       25.438\n  30 \u2502 Ferrari Dino         31.717\n  31 \u2502 Maserati Bora        24.15\n  32 \u2502 Volvo 142E           34.454\n                    20 rows omitted\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#sorting-columns","title":"Sorting columns","text":"<p>Ibis uses <code>order_by</code> similar to SQLs <code>ORDER BY</code></p> <pre><code>mtcars.order_by(_.mpg)\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 model               \u2503 mpg     \u2503 cyl   \u2503 disp    \u2503 hp    \u2503 drat    \u2503 wt      \u2503 qsec    \u2503 vs    \u2503 am    \u2503 gear  \u2503 carb  \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 string              \u2502 float64 \u2502 int64 \u2502 float64 \u2502 int64 \u2502 float64 \u2502 float64 \u2502 float64 \u2502 int64 \u2502 int64 \u2502 int64 \u2502 int64 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 Cadillac Fleetwood  \u2502    10.4 \u2502     8 \u2502   472.0 \u2502   205 \u2502    2.93 \u2502   5.250 \u2502   17.98 \u2502     0 \u2502     0 \u2502     3 \u2502     4 \u2502\n\u2502 Lincoln Continental \u2502    10.4 \u2502     8 \u2502   460.0 \u2502   215 \u2502    3.00 \u2502   5.424 \u2502   17.82 \u2502     0 \u2502     0 \u2502     3 \u2502     4 \u2502\n\u2502 Camaro Z28          \u2502    13.3 \u2502     8 \u2502   350.0 \u2502   245 \u2502    3.73 \u2502   3.840 \u2502   15.41 \u2502     0 \u2502     0 \u2502     3 \u2502     4 \u2502\n\u2502 Duster 360          \u2502    14.3 \u2502     8 \u2502   360.0 \u2502   245 \u2502    3.21 \u2502   3.570 \u2502   15.84 \u2502     0 \u2502     0 \u2502     3 \u2502     4 \u2502\n\u2502 Chrysler Imperial   \u2502    14.7 \u2502     8 \u2502   440.0 \u2502   230 \u2502    3.23 \u2502   5.345 \u2502   17.42 \u2502     0 \u2502     0 \u2502     3 \u2502     4 \u2502\n\u2502 Maserati Bora       \u2502    15.0 \u2502     8 \u2502   301.0 \u2502   335 \u2502    3.54 \u2502   3.570 \u2502   14.60 \u2502     0 \u2502     1 \u2502     5 \u2502     8 \u2502\n\u2502 Merc 450SLC         \u2502    15.2 \u2502     8 \u2502   275.8 \u2502   180 \u2502    3.07 \u2502   3.780 \u2502   18.00 \u2502     0 \u2502     0 \u2502     3 \u2502     3 \u2502\n\u2502 AMC Javelin         \u2502    15.2 \u2502     8 \u2502   304.0 \u2502   150 \u2502    3.15 \u2502   3.435 \u2502   17.30 \u2502     0 \u2502     0 \u2502     3 \u2502     2 \u2502\n\u2502 Dodge Challenger    \u2502    15.5 \u2502     8 \u2502   318.0 \u2502   150 \u2502    2.76 \u2502   3.520 \u2502   16.87 \u2502     0 \u2502     0 \u2502     3 \u2502     2 \u2502\n\u2502 Ford Pantera L      \u2502    15.8 \u2502     8 \u2502   351.0 \u2502   264 \u2502    4.22 \u2502   3.170 \u2502   14.50 \u2502     0 \u2502     1 \u2502     5 \u2502     4 \u2502\n\u2502 \u2026                   \u2502       \u2026 \u2502     \u2026 \u2502       \u2026 \u2502     \u2026 \u2502       \u2026 \u2502       \u2026 \u2502       \u2026 \u2502     \u2026 \u2502     \u2026 \u2502     \u2026 \u2502     \u2026 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>While TidierDB uses <code>@arrange</code> like TidierData.jl</p> <pre><code>@chain t(mtcars) @arrange(mpg) @collect\n</code></pre> <pre><code>32\u00d712 DataFrame\n Row \u2502 model                mpg       cyl     disp      hp      drat      wt        qsec      vs      am      gear    carb\n     \u2502 String?              Float64?  Int64?  Float64?  Int64?  Float64?  Float64?  Float64?  Int64?  Int64?  Int64?  Int64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Cadillac Fleetwood       10.4       8     472.0     205      2.93     5.25      17.98       0       0       3       4\n   2 \u2502 Lincoln Continental      10.4       8     460.0     215      3.0      5.424     17.82       0       0       3       4\n   3 \u2502 Camaro Z28               13.3       8     350.0     245      3.73     3.84      15.41       0       0       3       4\n   4 \u2502 Duster 360               14.3       8     360.0     245      3.21     3.57      15.84       0       0       3       4\n   5 \u2502 Chrysler Imperial        14.7       8     440.0     230      3.23     5.345     17.42       0       0       3       4\n   6 \u2502 Maserati Bora            15.0       8     301.0     335      3.54     3.57      14.6        0       1       5       8\n  \u22ee  \u2502          \u22ee              \u22ee        \u22ee        \u22ee        \u22ee        \u22ee         \u22ee         \u22ee        \u22ee       \u22ee       \u22ee       \u22ee\n  27 \u2502 Porsche 914-2            26.0       4     120.3      91      4.43     2.14      16.7        0       1       5       2\n  28 \u2502 Fiat X1-9                27.3       4      79.0      66      4.08     1.935     18.9        1       1       4       1\n  29 \u2502 Honda Civic              30.4       4      75.7      52      4.93     1.615     18.52       1       1       4       2\n  30 \u2502 Lotus Europa             30.4       4      95.1     113      3.77     1.513     16.9        1       1       5       2\n  31 \u2502 Fiat 128                 32.4       4      78.7      66      4.08     2.2       19.47       1       1       4       1\n  32 \u2502 Toyota Corolla           33.9       4      71.1      65      4.22     1.835     19.9        1       1       4       1\n                                                                                                              20 rows omitted\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#selecting-columns","title":"Selecting columns","text":"<p>In Ibis, columns must be prefixed with the table name, or in this case <code>_</code>, or they can be given as a string. Finally to using helper functions like <code>startswith</code> requires importing selectors as above.</p> <pre><code>mtcars.select(s.startswith(\"m\"), \"drat\", _.wt)\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 model             \u2503 mpg     \u2503 drat    \u2503 wt      \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 string            \u2502 float64 \u2502 float64 \u2502 float64 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 Mazda RX4         \u2502    21.0 \u2502    3.90 \u2502   2.620 \u2502\n\u2502 Mazda RX4 Wag     \u2502    21.0 \u2502    3.90 \u2502   2.875 \u2502\n\u2502 Datsun 710        \u2502    22.8 \u2502    3.85 \u2502   2.320 \u2502\n\u2502 Hornet 4 Drive    \u2502    21.4 \u2502    3.08 \u2502   3.215 \u2502\n\u2502 Hornet Sportabout \u2502    18.7 \u2502    3.15 \u2502   3.440 \u2502\n\u2502 Valiant           \u2502    18.1 \u2502    2.76 \u2502   3.460 \u2502\n\u2502 Duster 360        \u2502    14.3 \u2502    3.21 \u2502   3.570 \u2502\n\u2502 Merc 240D         \u2502    24.4 \u2502    3.69 \u2502   3.190 \u2502\n\u2502 Merc 230          \u2502    22.8 \u2502    3.92 \u2502   3.150 \u2502\n\u2502 Merc 280          \u2502    19.2 \u2502    3.92 \u2502   3.440 \u2502\n\u2502 \u2026                 \u2502       \u2026 \u2502       \u2026 \u2502       \u2026 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>TidierDB does not require names to be prefixed and, like TidierData, tidy column selection with <code>starts_with</code>, <code>ends_with</code>, and <code>contains</code> is supported at base. TidierDB also supports providing column names as strings, although this would only be needed in the setting of renaming a column with a space in it.</p> <pre><code>@chain t(mtcars) @select(starts_with(\"m\"), \"drat\", wt) @collect\n</code></pre> <pre><code>32\u00d72 DataFrame\n32\u00d74 DataFrame\n Row \u2502 model              mpg       drat      wt\n     \u2502 String?            Float64?  Float64?  Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Mazda RX4              21.0      3.9      2.62\n   2 \u2502 Mazda RX4 Wag          21.0      3.9      2.875\n   3 \u2502 Datsun 710             22.8      3.85     2.32\n   4 \u2502 Hornet 4 Drive         21.4      3.08     3.215\n   5 \u2502 Hornet Sportabout      18.7      3.15     3.44\n   6 \u2502 Valiant                18.1      2.76     3.46\n  \u22ee  \u2502         \u22ee             \u22ee         \u22ee         \u22ee\n  27 \u2502 Porsche 914-2          26.0      4.43     2.14\n  28 \u2502 Lotus Europa           30.4      3.77     1.513\n  29 \u2502 Ford Pantera L         15.8      4.22     3.17\n  30 \u2502 Ferrari Dino           19.7      3.62     2.77\n  31 \u2502 Maserati Bora          15.0      3.54     3.57\n  32 \u2502 Volvo 142E             21.4      4.11     2.78\n                                        20 rows omitted\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#multi-step-queries-and-summarizing","title":"Multi step queries and summarizing","text":"<p>Aggregating data is done with <code>aggregate</code> in ibis and <code>@summarize</code> in TidierDB. There is a slight difference in grouping data. Ibis uses <code>by =</code> within the <code>aggregate</code> call vs TidierDB adheres to <code>@group_by</code> convention</p> <pre><code>mtcars.aggregate(\n    total_hp=_.hp.sum(),\n    avg_hp=_.hp.mean(),\n    having=_.hp.sum() &lt; 1000,\n    by=['cyl']\n)\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 cyl   \u2503 total_hp \u2503 avg_hp     \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 int64 \u2502 int64    \u2502 float64    \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502     6 \u2502      856 \u2502 122.285714 \u2502\n\u2502     4 \u2502      909 \u2502  82.636364 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>In TidierDB, <code>@filter</code> will automatically determine whether the criteria belong in a WHERE or HAVING in SQL clause.</p> <pre><code>@chain t(mtcars) begin\n    @group_by(cyl)\n    @summarize(total_hp = sum(hp),\n               avg_hp = avg(hp))\n    @filter(total_hp &lt; 1000)\n    @collect\nend\n</code></pre> <pre><code>2\u00d73 DataFrame\n Row \u2502 cyl     total_hp  avg_hp\n     \u2502 Int64?  Int128?   Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      6       856  122.286\n   2 \u2502      4       909   82.6364\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#renaming-columns","title":"Renaming columns","text":"<p>Both tools use <code>rename</code>/@rename to rename columns</p> <pre><code>mtcars.rename(make_model = \"model\").select(_.make_model)\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 make_model        \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 string            \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 Mazda RX4         \u2502\n\u2502 Mazda RX4 Wag     \u2502\n\u2502 Datsun 710        \u2502\n\u2502 Hornet 4 Drive    \u2502\n\u2502 Hornet Sportabout \u2502\n\u2502 Valiant           \u2502\n\u2502 Duster 360        \u2502\n\u2502 Merc 240D         \u2502\n\u2502 Merc 230          \u2502\n\u2502 Merc 280          \u2502\n\u2502 \u2026                 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <pre><code>@chain t(mtcars) @rename(model_make = model) @select(model_make) @collect\n</code></pre> <pre><code>32\u00d71 DataFrame\n Row \u2502 model_make\n     \u2502 String?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Mazda RX4\n   2 \u2502 Mazda RX4 Wag\n   3 \u2502 Datsun 710\n   4 \u2502 Hornet 4 Drive\n   5 \u2502 Hornet Sportabout\n   6 \u2502 Valiant\n  \u22ee  \u2502         \u22ee\n  27 \u2502 Porsche 914-2\n  28 \u2502 Lotus Europa\n  29 \u2502 Ford Pantera L\n  30 \u2502 Ferrari Dino\n  31 \u2502 Maserati Bora\n  32 \u2502 Volvo 142E\n          20 rows omitted\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/key_differences/","title":"Key Differences from TidierData.jl","text":"<p>There are a few important syntax and behavior differences between TidierDB.jl and TidierData.jl outlined below.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#creating-a-database","title":"Creating a database","text":"<p>For these examples we will use DuckDB, the default backend, although SQLite, Postgres, MySQL, MariaDB, MSSQL, and ClickHouse are possible. If you have an existing DuckDB connection, then this step is not required. For these examples, we will create a data frame and copy it to an in-memory DuckDB database.</p> <pre><code>using DataFrames, TidierDB\n\ndf = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9],\n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10],\n                        value = repeat(1:5, 2),\n                        percent = 0.1:0.1:1.0);\n\ndb = connect(duckdb());\n\ncopy_to(db, df, \"df_mem\"); # copying over the data frame to an in-memory database\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#row-ordering","title":"Row ordering","text":"<p>DuckDB benefits from aggressive parallelization of pipelines. This means that if you have multiple threads enabled in Julia, which you can check or set using <code>Threads.nthreads()</code>, DuckDB will use multiple threads. However, because many operations are multi-threaded, the resulting row order is inconsistent. If row order needs to be deterministic for your use case, make sure to apply an <code>@arrange(column_name_1, column_name_2, etc...)</code> prior to collecting the results.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#starting-a-chain","title":"Starting a chain","text":"<p>When using TidierDB, <code>db_table(connection, :table_name)</code> is used to start a chain.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#grouped-mutation","title":"Grouped mutation","text":"<p>In TidierDB, when performing <code>@group_by</code> then <code>@mutate</code>, the table will be ungrouped after applying all of the mutations in the clause to the grouped data. To perform subsequent grouped operations, the user would have to regroup the data. This is demonstrated below.</p> <pre><code>@chain db_table(db, :df_mem) begin\n    @group_by(groups)\n    @summarize(mean_percent = mean(percent))\n    @collect\n end\n</code></pre> 2\u00d72 DataFrame Rowgroupsmean_percentString?Float64?1bb0.52aa0.6 <p>Regrouping following <code>@mutate</code></p> <pre><code>@chain db_table(db, :df_mem) begin\n    @group_by(groups)\n    @mutate(max = maximum(percent), min = minimum(percent))\n    @group_by(groups)\n    @summarise(mean_percent = mean(percent))\n    @collect\nend\n</code></pre> 2\u00d72 DataFrame Rowgroupsmean_percentString?Float64?1bb0.52aa0.6 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#joining","title":"Joining","text":"<p>There is one key difference for joining:</p> <p>The column on both the new and old table must be specified. They do not need to be the same, and given SQL behavior where both columns are kept when joining two tables, it is preferable if they have different names. This avoids \"ambiguous reference\" errors that would otherwise come up and complicate the use of tidy selection for columns. Athena has an additional slight difference given the need for parameters, which is covered in the Athena documentation page.</p> <pre><code>df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\ncopy_to(db, df2, \"df_join\");\n\n@chain db_table(db, :df_mem) begin\n    @left_join(df_join, id2, id)\n    @collect\nend\n</code></pre> 10\u00d77 DataFrame Rowidgroupsvaluepercentid2categoryscoreString?String?Int64?Float64?String?String?Int64?1AAbb10.1AAX882ACbb30.3ACY923AEbb50.5AEX774AGbb20.7AGY835AIbb40.9AIX956ABaa20.2missingmissingmissing7ADaa40.4missingmissingmissing8AFaa10.6missingmissingmissing9AHaa30.8missingmissingmissing10AJaa51.0missingmissingmissing <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#differences-in-case_when","title":"Differences in <code>case_when()</code>","text":"<p>In TidierDB, after the clause is completed, the result for the new column should is separated by a comma <code>,</code> in contrast to TidierData.jl, where the result for the new column is separated by a <code>=&gt;</code> .</p> <pre><code>@chain db_table(db, :df_mem) begin\n    @mutate(new_col = case_when(percent &gt; .5, \"Pass\",  # in TidierData, percent &gt; .5 =&gt; \"Pass\",\n                                percent &lt;= .5, \"Try Again\", # percent &lt;= .5 =&gt; \"Try Again\"\n                                true, \"middle\"))\n    @collect\n end\n</code></pre> 10\u00d75 DataFrame Rowidgroupsvaluepercentnew_colString?String?Int64?Float64?String?1AAbb10.1Try Again2ABaa20.2Try Again3ACbb30.3Try Again4ADaa40.4Try Again5AEbb50.5Try Again6AFaa10.6Pass7AGbb20.7Pass8AHaa30.8Pass9AIbb40.9Pass10AJaa51.0Pass <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#interpolation","title":"Interpolation","text":"<p>To use !! Interpolation, instead of being able to define the alternate names/value in the global context, the user has to use <code>@interpolate</code>. This will hopefully be fixed in future versions. Otherwise, the behavior is generally the same, although this creates friction around calling functions.</p> <p>Also, when using interpolation with exponenents, the interpolated value must go inside of parenthesis.</p> <pre><code>@interpolate((test, :percent)); # this still supports strings, vectors of names, and values\n\n@chain db_table(db, :df_mem) begin\n    @mutate(new_col = case_when((!!test)^2 &gt; .5, \"Pass\",\n                                (!!test)^2 &lt; .5, \"Try Again\",\n                                \"middle\"))\n    @collect\nend\n</code></pre> <pre><code>10\u00d75 DataFrame\n Row \u2502 id       groups   value   percent   new_col\n     \u2502 String?  String?  Int64?  Float64?  String?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1  Try Again\n   2 \u2502 AB       aa            2       0.2  Try Again\n   3 \u2502 AC       bb            3       0.3  Try Again\n  \u22ee  \u2502    \u22ee        \u22ee       \u22ee        \u22ee          \u22ee\n   8 \u2502 AH       aa            3       0.8  Pass\n   9 \u2502 AI       bb            4       0.9  Pass\n  10 \u2502 AJ       aa            5       1.0  Pass\n                                       4 rows omitted\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#slicing-ties","title":"Slicing ties","text":"<p><code>slice_min()</code> and <code>@slice_max()</code> will always return ties due to SQL behavior.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/outofmemex/","title":"Working With Larger than RAM Datasets","text":"<p>While using the DuckDB backend, TidierDB's lazy intferace enables querying datasets larger than your available RAM.</p> <p>To illustrate this, we will recreate the Hugging Face x Polars example. The final table results are shown below and in this Hugging Face x DuckDB example</p> <p>First we will load TidierDB, set up a local database and then set the URLs for the 2 training datasets from huggingface.co</p> <pre><code>using TidierDB\ndb = connect(duckdb())\n\nurls = [\"https://huggingface.co/datasets/blog_authorship_corpus/resolve/refs%2Fconvert%2Fparquet/blog_authorship_corpus/train/0000.parquet\",\n \"https://huggingface.co/datasets/blog_authorship_corpus/resolve/refs%2Fconvert%2Fparquet/blog_authorship_corpus/train/0001.parquet\"];\n</code></pre> <p>Here, we pass the vector of URLs to <code>db_table</code>, which will not copy them into memory. Since these datasets are so large, we will also set <code>stream = true</code> in <code>@collect</code> to stream the results. If we wanted to read all the files in the folder we could have replace the <code>0000</code> with <code>*</code> (wildcard) <code>db_table(db, \"Path/to/folder/*.parquet\")</code> Of note, reading these files from URLs is not as rapid as reading them from local files.</p> <pre><code>@chain db_table(db, urls) begin\n    @group_by(horoscope)\n    @summarise(count = n(), avg_blog_length = mean(length(text)))\n    @arrange(desc(count))\n    @aside @show_query _\n    @collect(stream = true)\nend\n</code></pre> <p>Placing <code>@aside @show_query _</code> before <code>@collect</code> above lets us see the SQL query and collect it to a local DataFrame at the same time.</p> <pre><code>SELECT horoscope, COUNT(*) AS count, AVG(length(text)) AS avg_blog_length\n        FROM read_parquet(['https://huggingface.co/datasets/blog_authorship_corpus/resolve/refs%2Fconvert%2Fparquet/blog_authorship_corpus/train/0000.parquet', 'https://huggingface.co/datasets/blog_authorship_corpus/resolve/refs%2Fconvert%2Fparquet/blog_authorship_corpus/train/0001.parquet'])\n        GROUP BY horoscope\n        ORDER BY avg_blog_length DESC\n12\u00d73 DataFrame\n Row \u2502 horoscope    count   avg_blog_length\n     \u2502 String?      Int64?  Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Aquarius      49568         1125.83\n   2 \u2502 Cancer        63512         1097.96\n   3 \u2502 Libra         60304         1060.61\n   4 \u2502 Capricorn     49402         1059.56\n   5 \u2502 Sagittarius   50431         1057.46\n   6 \u2502 Leo           58010         1049.6\n   7 \u2502 Taurus        61571         1022.69\n   8 \u2502 Gemini        52925         1020.26\n   9 \u2502 Scorpio       56495         1014.03\n  10 \u2502 Pisces        53812         1011.75\n  11 \u2502 Virgo         64629          996.684\n  12 \u2502 Aries         69134          918.081\n</code></pre> <p>To learn more about memory efficient queries on larger than RAM files, this blog from DuckDB will help maximize your local <code>db</code></p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/s3viaduckdb/","title":"S3 + DuckDB + TidierDB","text":"<p>TidierDB allows you leverage DuckDB's seamless database integration.</p> <p>Using DuckDB, you can connect to an AWS or GoogleCloud Database to query directly without making any local copies.</p> <p>You can also use <code>DBInterface.execute</code> to set up any DuckDB database connection you need and then use that db to query with TidierDB</p> <pre><code>using TidierDB\n\n#Connect to Google Cloud via DuckDB\n#google_db = connect(duckdb(), :gbq, access_key=\"string\", secret_key=\"string\")\n\n#Connect to AWS via DuckDB\naws_db = connect(duckdb(), :aws, aws_access_key_id= \"string\",\n                                aws_secret_access_key= \"string\",\n                                aws_region=\"us-east-1\")\ns3_csv_path = \"s3://path/to_data.csv\"\n\n@chain db_table(aws_db, s3_csv_path) begin\n    @filter(!starts_with(column1, \"M\"))\n    @group_by(cyl)\n    @summarize(mpg = mean(mpg))\n    @mutate(mpg_squared = mpg^2,\n               mpg_rounded = round(mpg),\n               mpg_efficiency = case_when(\n                                 mpg &gt;= cyl^2 , \"efficient\",\n                                 mpg &lt; 15.2 , \"inefficient\",\n                                 \"moderate\"))\n    @filter(mpg_efficiency in (\"moderate\", \"efficient\"))\n    @arrange(desc(mpg_rounded))\n    @collect\nend\n</code></pre> <pre><code>2\u00d75 DataFrame\n Row \u2502 cyl     mpg       mpg_squared  mpg_rounded  mpg_efficiency\n     \u2502 Int64?  Float64?  Float64?     Float64?     String?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      4   27.3444      747.719         27.0  efficient\n   2 \u2502      6   19.7333      389.404         20.0  moderate\n</code></pre> <p>This page was generated using Literate.jl.</p>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":""},{"location":"#what-is-tidierdbjl","title":"What is TidierDB.jl?","text":"<p>TiderDB.jl is a 100% Julia implementation of the dbplyr R package, and similar to Python's ibis package.</p> <p>The main goal of TidierDB.jl is to bring the syntax of Tidier.jl to multiple SQL backends, making it possible to analyze data directly on databases without needing to copy the entire database into memory.</p> <p></p> <p></p>"},{"location":"#currently-supported-backends-include","title":"Currently supported backends include:","text":"<ul> <li>DuckDB (the default) <code>duckdb()</code></li> <li>ClickHouse <code>clickhouse()</code></li> <li>SQLite <code>sqlite()</code></li> <li>MySQL and MariaDB <code>mysql()</code></li> <li>MSSQL <code>mssql()</code></li> <li>Postgres <code>postgres()</code></li> <li>Athena <code>athena()</code></li> <li>Snowflake <code>snowflake()</code></li> <li>Google Big Query <code>gbq()</code></li> <li>Oracle <code>oracle()</code></li> <li>Databricks <code>databricks()</code></li> </ul> <p>Change the backend using <code>set_sql_mode()</code> - for example  - <code>set_sql_mode(databricks())</code></p> <p></p> <p></p>"},{"location":"#installation","title":"Installation","text":"<p>For the stable version:</p> <pre><code>] add TidierDB\n</code></pre> <p>TidierDB.jl currently supports the following top-level macros:</p> <ul> <li><code>@arrange</code></li> <li><code>@group_by</code></li> <li><code>@filter</code></li> <li><code>@select</code></li> <li><code>@mutate</code>, which supports <code>across()</code></li> <li><code>@summarize</code> and <code>@summarise</code>, which supports <code>across()</code></li> <li><code>@distinct</code></li> <li><code>@left_join</code>, <code>@right_join</code>, <code>@inner_join</code>, <code>@anti_join</code>, <code>@full_join</code>, and <code>@semi_join</code> (slight syntax differences from TidierData.jl)</li> <li><code>@count</code></li> <li><code>@slice_min</code>, <code>@slice_max</code>, <code>@slice_sample</code></li> <li><code>@window_order</code> and <code>window_frame</code></li> <li><code>@show_query</code></li> <li><code>@collect</code></li> </ul> <p>Supported helper functions for most backends include:</p> <ul> <li><code>across()</code></li> <li><code>desc()</code></li> <li><code>if_else()</code> and <code>case_when()</code></li> <li><code>n()</code></li> <li><code>starts_with()</code>, <code>ends_with()</code>, and <code>contains()</code></li> <li><code>as_float()</code>, <code>as_integer()</code>, and <code>as_string()</code></li> <li><code>is_missing()</code></li> <li><code>missing_if()</code> and <code>replace_missing()</code></li> </ul> <p>From TidierStrings.jl:</p> <ul> <li><code>str_detect</code>, <code>str_replace</code>, <code>str_replace_all</code>, <code>str_remove_all</code>, <code>str_remove</code></li> </ul> <p>From TidierDates.jl:</p> <ul> <li><code>year</code>, <code>month</code>, <code>day</code>, <code>hour</code>, <code>min</code>, <code>second</code>, <code>floor_date</code>, <code>difftime</code></li> </ul> <p>Supported aggregate functions (as supported by the backend) with more to come</p> <ul> <li><code>mean</code>, <code>minimium</code>, <code>maximum</code>, <code>std</code>, <code>sum</code>, <code>cumsum</code>, <code>cor</code>, <code>cov</code>, <code>var</code></li> <li><code>@summarize</code> supports any SQL aggregate function in addition to the list above. Simply write the function as written in SQL syntax and it will work</li> </ul> <p>When using the DuckDB backend, if <code>db_table</code> recieves a file path ( <code>.parquet</code>, <code>.json</code>, <code>.csv</code>, <code>iceberg</code> or <code>delta</code>), it does not copy it into memory. This allows for queries on files too big for memory. <code>db_table</code> also supports S3 bucket locations via DuckDB.</p> <p></p> <p></p>"},{"location":"#what-is-the-recommended-way-to-use-tidierdb","title":"What is the recommended way to use TidierDB?","text":"<p>Typically, you will want to use TidierDB alongside TidierData because there are certain functionality (such as pivoting) which are only supported in TidierData and can only be performed on data frames.</p> <p>Our recommended path for using TidierDB is to import the package so that there are no namespace conflicts with TidierData. Once TidierDB is integrated with Tidier, then Tidier will automatically load the packages in this fashion.</p> <p>First, let's develop and execute a query using TidierDB. Notice that all top-level macros and functions originating from TidierDB start with a <code>DB</code> prefix. Any functions defined within macros do not need to be prefixed within <code>DB</code> because they are actually pseudofunctions that are in actuality converted into SQL code.</p> <p>Even though the code reads similarly to TidierData, note that no computational work actually occurs until you run <code>DB.@collect()</code>, which runs the SQL query and instantiates the result as a DataFrame.</p> <pre><code>using TidierData\nimport TidierDB as DB\n\ndb = DB.connect(DB.duckdb());\npath_or_name = \"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\"\n\n@chain DB.db_table(db, path_or_name) begin\n    DB.@filter(!starts_with(model, \"M\"))\n    DB.@group_by(cyl)\n    DB.@summarize(mpg = mean(mpg))\n    DB.@mutate(mpg_squared = mpg^2, \n               mpg_rounded = round(mpg), \n               mpg_efficiency = case_when(\n                                 mpg &gt;= cyl^2 , \"efficient\",\n                                 mpg &lt; 15.2 , \"inefficient\",\n                                 \"moderate\"))            \n    DB.@filter(mpg_efficiency in (\"moderate\", \"efficient\"))\n    DB.@arrange(desc(mpg_rounded))\n    DB.@collect\nend\n</code></pre> <pre><code>2\u00d75 DataFrame\n Row \u2502 cyl     mpg       mpg_squared  mpg_rounded  mpg_efficiency \n     \u2502 Int64?  Float64?  Float64?     Float64?     String?        \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      4   27.3444      747.719         27.0  efficient\n   2 \u2502      6   19.7333      389.404         20.0  moderate\n</code></pre> <p></p> <p></p>"},{"location":"#what-if-we-wanted-to-pivot-the-result","title":"What if we wanted to pivot the result?","text":"<p>We cannot do this using TidierDB. However, we can call <code>@pivot_longer()</code> from TidierData after the result of the query has been instantiated as a DataFrame, like this: </p> <pre><code>@chain DB.db_table(db, path_or_name) begin\n    DB.@filter(!starts_with(model, \"M\"))\n    DB.@group_by(cyl)\n    DB.@summarize(mpg = mean(mpg))\n    DB.@mutate(mpg_squared = mpg^2, \n               mpg_rounded = round(mpg), \n               mpg_efficiency = case_when(\n                                 mpg &gt;= cyl^2 , \"efficient\",\n                                 mpg &lt; 15.2 , \"inefficient\",\n                                 \"moderate\"))            \n    DB.@filter(mpg_efficiency in (\"moderate\", \"efficient\"))\n    DB.@arrange(desc(mpg_rounded))\n    DB.@collect\n    @pivot_longer(everything(), names_to = \"variable\", values_to = \"value\")\nend\n</code></pre> <pre><code>10\u00d72 DataFrame\n Row \u2502 variable        value     \n     \u2502 String          Any       \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 cyl             4\n   2 \u2502 cyl             6\n   3 \u2502 mpg             27.3444\n   4 \u2502 mpg             19.7333\n   5 \u2502 mpg_squared     747.719\n   6 \u2502 mpg_squared     389.404\n   7 \u2502 mpg_rounded     27.0\n   8 \u2502 mpg_rounded     20.0\n   9 \u2502 mpg_efficiency  efficient\n  10 \u2502 mpg_efficiency  moderate\n</code></pre> <p></p> <p></p>"},{"location":"#what-sql-query-does-tidierdb-generate-for-a-given-piece-of-julia-code","title":"What SQL query does TidierDB generate for a given piece of Julia code?","text":"<p>We can replace <code>DB.collect()</code> with <code>DB.@show_query</code> to reveal the underlying SQL query being generated by TidierDB. To handle complex queries, TidierDB makes heavy use of Common Table Expressions (CTE), which are a useful tool to organize long queries.</p> <pre><code>@chain DB.db_table(db, path_or_name) begin\n    DB.@filter(!starts_with(model, \"M\"))\n    DB.@group_by(cyl)\n    DB.@summarize(mpg = mean(mpg))\n    DB.@mutate(mpg_squared = mpg^2, \n               mpg_rounded = round(mpg), \n               mpg_efficiency = case_when(\n                                 mpg &gt;= cyl^2 , \"efficient\",\n                                 mpg &lt; 15.2 , \"inefficient\",\n                                 \"moderate\"))            \n    DB.@filter(mpg_efficiency in (\"moderate\", \"efficient\"))\n    DB.@arrange(desc(mpg_rounded))\n    DB.@show_query\nend\n</code></pre> <pre><code>WITH cte_1 AS (\nSELECT *\n        FROM mtcars\n        WHERE NOT (starts_with(model, 'M'))),\ncte_2 AS (\nSELECT cyl, AVG(mpg) AS mpg\n        FROM cte_1\n        GROUP BY cyl),\ncte_3 AS (\nSELECT  cyl, mpg, POWER(mpg, 2) AS mpg_squared, ROUND(mpg) AS mpg_rounded, CASE WHEN mpg &gt;= POWER(cyl, 2) THEN 'efficient' WHEN mpg &lt; 15.2 THEN 'inefficient' ELSE 'moderate' END AS mpg_efficiency\n        FROM cte_2 ),\ncte_4 AS (\nSELECT *\n        FROM cte_3\n        WHERE mpg_efficiency in ('moderate', 'efficient'))  \nSELECT *\n        FROM cte_4  \n        ORDER BY mpg_rounded DESC\n</code></pre> <p></p> <p></p>"},{"location":"#tidierdb-is-already-quite-fully-featured-supporting-advanced-tidierdata-functions-like-across-for-multi-column-selection","title":"TidierDB is already quite fully-featured, supporting advanced TidierData functions like <code>across()</code> for multi-column selection.","text":"<pre><code>@chain DB.db_table(db, path_or_name) begin\n    DB.@group_by(cyl)\n    DB.@summarize(across((starts_with(\"a\"), ends_with(\"s\")), (mean, sum)))\n    DB.@collect\nend\n</code></pre> <pre><code>3\u00d75 DataFrame\n Row \u2502 cyl     mean_am   mean_vs   sum_am   sum_vs  \n     \u2502 Int64?  Float64?  Float64?  Int128?  Int128? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      4  0.727273  0.909091        8       10\n   2 \u2502      6  0.428571  0.571429        3        4\n   3 \u2502      8  0.142857  0.0             2        0\n</code></pre> <p>Bang bang <code>!!</code> interpolation for columns and values is also supported.</p> <p>There are a few subtle but important differences from Tidier.jl outlined here.</p> <p></p> <p></p>"},{"location":"#missing-a-function-or-backend","title":"Missing a function or backend?","text":"<p>You can use any existing SQL function within <code>@mutate</code> with the correct SQL syntax and it should just work.</p> <p>But if you run into problems please open an issue, and we will be happy to take a look!</p>"},{"location":"reference/","title":"Reference","text":""},{"location":"reference/#index","title":"Index","text":"<ul> <li><code>TidierDB.connect</code></li> <li><code>TidierDB.copy_to</code></li> <li><code>TidierDB.db_table</code></li> <li><code>TidierDB.@anti_join</code></li> <li><code>TidierDB.@arrange</code></li> <li><code>TidierDB.@collect</code></li> <li><code>TidierDB.@count</code></li> <li><code>TidierDB.@distinct</code></li> <li><code>TidierDB.@filter</code></li> <li><code>TidierDB.@full_join</code></li> <li><code>TidierDB.@group_by</code></li> <li><code>TidierDB.@head</code></li> <li><code>TidierDB.@inner_join</code></li> <li><code>TidierDB.@interpolate</code></li> <li><code>TidierDB.@left_join</code></li> <li><code>TidierDB.@mutate</code></li> <li><code>TidierDB.@rename</code></li> <li><code>TidierDB.@right_join</code></li> <li><code>TidierDB.@select</code></li> <li><code>TidierDB.@semi_join</code></li> <li><code>TidierDB.@slice_max</code></li> <li><code>TidierDB.@slice_min</code></li> <li><code>TidierDB.@slice_sample</code></li> <li><code>TidierDB.@summarise</code></li> <li><code>TidierDB.@summarize</code></li> <li><code>TidierDB.@window_frame</code></li> <li><code>TidierDB.@window_order</code></li> </ul>"},{"location":"reference/#reference-exported-functions","title":"Reference - Exported functions","text":"<p># <code>TidierDB.connect</code> \u2014 Method.</p> <pre><code>connect(backend; kwargs...)\n</code></pre> <p>This function establishes a database connection based on the specified backend and connection parameters and sets the SQL mode</p> <p>Arguments</p> <ul> <li> <p><code>backend</code>: type specifying the database backend to connect to. Supported backends are:</p> <ul> <li><code>duckdb()</code>, <code>sqlite()</code>(SQLite), <code>mssql()</code>, <code>mysql()</code>(for MariaDB and MySQL), <code>clickhouse()</code>, <code>postgres()</code></li> <li> <p><code>kwargs</code>: Keyword arguments specifying the connection parameters for the selected backend. The required parameters vary depending on the backend:</p> </li> <li> <p>MySQL:</p> <ul> <li><code>host</code>: The host name or IP address of the MySQL server. Default is \"localhost\".</li> <li><code>user</code>: The username for authentication. Default is an empty string.</li> <li><code>password</code>: The password for authentication.</li> <li><code>db</code>: The name of the database to connect to (optional).</li> <li><code>port</code>: The port number of the MySQL server (optional).</li> </ul> </li> </ul> </li> </ul> <p>Returns</p> <ul> <li>A database connection object based on the selected backend.</li> </ul> <p>Examples</p> <pre><code># Connect to MySQL\n# conn = connect(mysql(); host=\"localhost\", user=\"root\", password=\"password\", db=\"mydb\")\n# Connect to PostgreSQL using LibPQ\n# conn = connect(postgres(); host=\"localhost\", dbname=\"mydb\", user=\"postgres\", password=\"password\")\n# Connect to ClickHouse\n# conn = connect(clickhouse(); host=\"localhost\", port=9000, database=\"mydb\", user=\"default\", password=\"\")\n# Connect to SQLite\n# conn = connect(sqlite())\n# Connect to Google Big Query\n# conn = connect(gbq(), \"json_user_key_path\", \"project_id\")\n# Connect to Snowflake\n# conn = connect(snowflake(), \"ac_id\", \"token\", \"Database_name\", \"Schema_name\", \"warehouse_name\")\n# Connect to DuckDB\n# connect to Google Cloud via DuckDB\n# google_db = connect(duckdb(), :gbq, access_key=\"string\", secret_key=\"string\")\n# Connect to AWS via DuckDB\n# aws_db = connect2(duckdb(), :aws, aws_access_key_id=get(ENV, \"AWS_ACCESS_KEY_ID\", \"access_key\"), aws_secret_access_key=get(ENV, \"AWS_SECRET_ACCESS_KEY\", \"secret_access key\"), aws_region=get(ENV, \"AWS_DEFAULT_REGION\", \"us-east-1\"))\n# Connect to MotherDuck\n# connect(duckdb(), \"token\") for first connection, vs connect(duckdb(), \"md:\") for reconnection\njulia&gt; db = connect(duckdb())\nDuckDB.Connection(\":memory:\")\n</code></pre> <p>source</p> <p># <code>TidierDB.copy_to</code> \u2014 Method.</p> <pre><code>   copy_to(conn, df_or_path, \"name\")\n</code></pre> <p>Allows user to copy a df to the database connection. Currently supports DuckDB, SQLite, MySql</p> <p>Arguments</p> <p>-<code>conn</code>: the database connection -<code>df</code>: dataframe to be copied or path to serve as source. With DuckDB, path supports .csv, .json, .parquet to be used without copying intermediary df. -<code>name</code>: name as string for the database to be used</p> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"test\");\n</code></pre> <p>source</p> <p># <code>TidierDB.db_table</code> \u2014 Function.</p> <pre><code>db_table(database, table_name, athena_params, delta = false, iceberg = false)\n</code></pre> <p><code>db_table</code> starts the underlying SQL query struct, adding the metadata and table. If paths are passed directly to db_table instead of a  name it will not copy it to memory, but rather ready directly from the file.</p> <p>Arguments</p> <ul> <li><code>database</code>: The Database or connection object</li> <li> <p><code>table_name</code>: tablename as a string (name, local path, or URL).     - CSV/TSV       - Parquet     - Json      - Iceberg     - Delta     - S3 tables from AWS or Google Cloud </p> <ul> <li>DuckDB and ClickHouse support vectors of paths and URLs.</li> <li>DuckDB and ClickHouse also support use of <code>*</code> wildcards to read all files of a type in a location such as:</li> <li><code>db_table(db, \"Path/to/testing_files/*.parquet\")</code></li> <li><code>delta</code>: must be true to read delta files</li> <li><code>iceberg</code>: must be true to read iceberg finalize_ctes</li> </ul> </li> </ul> <p>Example</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; db_table(db, \"df_mem\")\nTidierDB.SQLQuery(\"\", \"df_mem\", \"\", \"\", \"\", \"\", \"\", \"\", false, false, 4\u00d74 DataFrame\n Row \u2502 name     type     current_selxn  table_name \n     \u2502 String?  String?  Int64          String     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 id       VARCHAR              1  df_mem\n   2 \u2502 groups   VARCHAR              1  df_mem\n   3 \u2502 value    BIGINT               1  df_mem\n   4 \u2502 percent  DOUBLE               1  df_mem, false, DuckDB.Connection(\":memory:\"), TidierDB.CTE[], 0, nothing)\n</code></pre> <p>source</p> <p># <code>TidierDB.@anti_join</code> \u2014 Macro.</p> <pre><code>@anti_join(sql_query, join_table, new_table_col, orignal_table_col)\n</code></pre> <p>Perform an anti join between two SQL queries based on a specified condition.  This syntax here is slightly different than TidierData.jl, however, because  SQL does not drop the joining column, for the metadata storage, it is  preferrable for the names to be different </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The primary SQL query to operate on.</li> <li><code>join_table</code>: The secondary SQL table to join with the primary query table.</li> <li><code>new_table_col</code>: Column from the new table that matches for join.</li> <li><code>orignal_table_col</code>: Column from the original table that matches for join.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; copy_to(db, df2, \"df_join\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n        @anti_join(df_join, id2, id)\n        @collect\n       end\n5\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AB       aa            2       0.2\n   2 \u2502 AD       aa            4       0.4\n   3 \u2502 AF       aa            1       0.6\n   4 \u2502 AH       aa            3       0.8\n   5 \u2502 AJ       aa            5       1.0\n</code></pre> <p>source</p> <p># <code>TidierDB.@arrange</code> \u2014 Macro.</p> <pre><code>@arrange(sql_query, columns...)\n</code></pre> <p>Order SQL table rows based on specified column(s).</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>columns</code>: Columns to order the rows by. Can include multiple columns for nested sorting. Wrap column name with <code>desc()</code> for descending order.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @arrange(value, desc(percent))\n         @collect\n       end\n10\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AF       aa            1       0.6\n   2 \u2502 AA       bb            1       0.1\n   3 \u2502 AG       bb            2       0.7\n   4 \u2502 AB       aa            2       0.2\n   5 \u2502 AH       aa            3       0.8\n   6 \u2502 AC       bb            3       0.3\n   7 \u2502 AI       bb            4       0.9\n   8 \u2502 AD       aa            4       0.4\n   9 \u2502 AJ       aa            5       1.0\n  10 \u2502 AE       bb            5       0.5\n</code></pre> <p>source</p> <p># <code>TidierDB.@collect</code> \u2014 Macro.</p> <pre><code>@collect(sql_query, stream = false)\n</code></pre> <p><code>db_table</code> starts the underlying SQL query struct, adding the metadata and table. </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>stream</code>: optional streaming for query/execution of results when using duck db. Defaults to false</li> </ul> <p>Example</p> <pre><code>julia&gt; db = connect(duckdb());\n\njulia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @collect db_table(db, \"df_mem\")\n10\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1\n   2 \u2502 AB       aa            2       0.2\n   3 \u2502 AC       bb            3       0.3\n   4 \u2502 AD       aa            4       0.4\n   5 \u2502 AE       bb            5       0.5\n   6 \u2502 AF       aa            1       0.6\n   7 \u2502 AG       bb            2       0.7\n   8 \u2502 AH       aa            3       0.8\n   9 \u2502 AI       bb            4       0.9\n  10 \u2502 AJ       aa            5       1.0\n</code></pre> <p>source</p> <p># <code>TidierDB.@count</code> \u2014 Macro.</p> <pre><code>@count(sql_query, columns...)\n</code></pre> <p>Count the number of rows grouped by specified column(s).</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>columns</code>: Columns to group by before counting. If no columns are specified, counts all rows in the query.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @count(groups)\n         @arrange(groups)\n         @collect\n       end\n2\u00d72 DataFrame\n Row \u2502 groups   count  \n     \u2502 String?  Int64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa            5\n   2 \u2502 bb            5\n</code></pre> <p>source</p> <p># <code>TidierDB.@distinct</code> \u2014 Macro.</p> <pre><code>@distinct(sql_query, columns...)\n</code></pre> <p>Select distinct rows based on specified column(s). Distinct works differently in TidierData vs SQL and therefore TidierDB. Distinct will also select only the only columns it is given (or all if given none)</p> <p>Arguments</p> <p><code>sql_query</code>: The SQL query to operate on. <code>columns</code>: Columns to determine uniqueness. If no columns are specified, all columns are used to identify distinct rows.</p> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @distinct(value)\n         @arrange(value)\n         @collect\n       end\n5\u00d71 DataFrame\n Row \u2502 value  \n     \u2502 Int64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      1\n   2 \u2502      2\n   3 \u2502      3\n   4 \u2502      4\n   5 \u2502      5\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @distinct\n         @arrange(id)\n         @collect\n       end\n10\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1\n   2 \u2502 AB       aa            2       0.2\n   3 \u2502 AC       bb            3       0.3\n   4 \u2502 AD       aa            4       0.4\n   5 \u2502 AE       bb            5       0.5\n   6 \u2502 AF       aa            1       0.6\n   7 \u2502 AG       bb            2       0.7\n   8 \u2502 AH       aa            3       0.8\n   9 \u2502 AI       bb            4       0.9\n  10 \u2502 AJ       aa            5       1.0\n</code></pre> <p>source</p> <p># <code>TidierDB.@filter</code> \u2014 Macro.</p> <pre><code>@filter(sql_query, conditions...)\n</code></pre> <p>Filter rows in a SQL table based on specified conditions.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to filter rows from.</li> <li> <p><code>conditions</code>: Expressions specifying the conditions that rows must satisfy to be included in the output.                   Rows for which the expression evaluates to <code>true</code> will be included in the result.                   Multiple conditions can be combined using logical operators (<code>&amp;&amp;</code>, <code>||</code>). It will automatically                   detect whether the conditions belong in WHERE vs HAVING. </p> <pre><code>             Temporarily, it is best to use begin and end when filtering multiple conditions. (ex 2 below)\n</code></pre> </li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @filter(percent &gt; .5)\n         @collect\n       end\n5\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AF       aa            1       0.6\n   2 \u2502 AG       bb            2       0.7\n   3 \u2502 AH       aa            3       0.8\n   4 \u2502 AI       bb            4       0.9\n   5 \u2502 AJ       aa            5       1.0\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @summarise(mean = mean(percent))\n         @filter begin \n           groups == \"bb\" || # logical operators can still be used like this\n           mean &gt; .5\n         end\n         @arrange(groups)\n         @collect\n       end\n2\u00d72 DataFrame\n Row \u2502 groups   mean     \n     \u2502 String?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa            0.6\n   2 \u2502 bb            0.5\n</code></pre> <p>source</p> <p># <code>TidierDB.@full_join</code> \u2014 Macro.</p> <pre><code>@inner_join(sql_query, join_table, new_table_col, orignal_table_col)\n</code></pre> <p>Perform an full join between two SQL queries based on a specified condition.  This syntax here is slightly different than TidierData.jl, however, because  SQL does not drop the joining column, for the metadata storage, it is  preferrable for the names to be different </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The primary SQL query to operate on.</li> <li><code>join_table</code>: The secondary SQL table to join with the primary query table.</li> <li><code>new_table_col</code>: Column from the new table that matches for join.</li> <li><code>orignal_table_col</code>: Column from the original table that matches for join.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; copy_to(db, df2, \"df_join\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @full_join(df_join, id2, id)\n         @collect\n       end\n12\u00d77 DataFrame\n Row \u2502 id       groups   value    percent    id2      category  score   \n     \u2502 String?  String?  Int64?   Float64?   String?  String?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb             1        0.1  AA       X              88\n   2 \u2502 AC       bb             3        0.3  AC       Y              92\n   3 \u2502 AE       bb             5        0.5  AE       X              77\n   4 \u2502 AG       bb             2        0.7  AG       Y              83\n   5 \u2502 AI       bb             4        0.9  AI       X              95\n   6 \u2502 AB       aa             2        0.2  missing  missing   missing \n   7 \u2502 AD       aa             4        0.4  missing  missing   missing \n   8 \u2502 AF       aa             1        0.6  missing  missing   missing \n   9 \u2502 AH       aa             3        0.8  missing  missing   missing \n  10 \u2502 AJ       aa             5        1.0  missing  missing   missing \n  11 \u2502 missing  missing  missing  missing    AK       Y              68\n  12 \u2502 missing  missing  missing  missing    AM       X              74\n</code></pre> <p>source</p> <p># <code>TidierDB.@group_by</code> \u2014 Macro.</p> <pre><code>@group_by(sql_query, columns...)\n</code></pre> <p>Group SQL table rows by specified column(s). If grouping is performed as a terminal operation without a subsequent mutatation or summarization (as in the example below), then the resulting data frame will be ungrouped when <code>@collect</code> is applied.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>exprs</code>: Expressions specifying the columns to group by. Columns can be specified by name.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @arrange(groups)\n         @collect\n       end\n2\u00d71 DataFrame\n Row \u2502 groups  \n     \u2502 String? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa\n   2 \u2502 bb\n</code></pre> <p>source</p> <p># <code>TidierDB.@head</code> \u2014 Macro.</p> <pre><code>@head(sql_query, value)\n</code></pre> <p>Limit SQL table number of rows returned based on specified value.  <code>LIMIT</code> in SQL</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>value</code>: Number to limit how many rows are returned.</li> </ul> <p>Examples</p> <pre><code>julia&gt; db = connect(duckdb());\n\njulia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; copy_to(db, df, \"df_mem\");                     \n\njulia&gt; @chain db_table(db, :df_mem) begin\n        @head(1) ## supports expressions ie `3-2` would return the same df below\n        @collect\n       end\n1\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1\n</code></pre> <p>source</p> <p># <code>TidierDB.@inner_join</code> \u2014 Macro.</p> <pre><code>@inner_join(sql_query, join_table, new_table_col, orignal_table_col)\n</code></pre> <p>Perform an inner join between two SQL queries based on a specified condition.  This syntax here is slightly different than TidierData.jl, however, because  SQL does not drop the joining column, for the metadata storage, it is  preferrable for the names to be different </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The primary SQL query to operate on.</li> <li><code>join_table</code>: The secondary SQL table to join with the primary query table.</li> <li><code>new_table_col</code>: Column from the new table that matches for join.</li> <li><code>orignal_table_col</code>: Column from the original table that matches for join.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; copy_to(db, df2, \"df_join\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @inner_join(df_join, id2, id)\n         @collect\n       end\n5\u00d77 DataFrame\n Row \u2502 id       groups   value   percent   id2      category  score  \n     \u2502 String?  String?  Int64?  Float64?  String?  String?   Int64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1  AA       X             88\n   2 \u2502 AC       bb            3       0.3  AC       Y             92\n   3 \u2502 AE       bb            5       0.5  AE       X             77\n   4 \u2502 AG       bb            2       0.7  AG       Y             83\n   5 \u2502 AI       bb            4       0.9  AI       X             95\n</code></pre> <p>source</p> <p># <code>TidierDB.@interpolate</code> \u2014 Macro.</p> <pre><code>@interpolate(args...)\n</code></pre> <p>Interpolate parameters into expressions for database queries.</p> <p>Arguments</p> <ul> <li> <p><code>args...</code>: A variable number of tuples. Each tuple should contain:</p> <ul> <li><code>name</code>: The name of the parameter to interpolate.</li> <li><code>value</code>: (Any): The value/vector to interpolate for the corresponding parameter name.</li> </ul> </li> </ul> <p>Example</p> <pre><code>julia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; col_names = [:id, :value, :percent];\n\njulia&gt; cond1 = .2;\n\njulia&gt; cond2 = 5;\n\njulia&gt; @interpolate((condition1, cond1), (columns, col_names), (condition2, cond2));\n\njulia&gt; @chain db_table(db, \"df_mem\") begin \n          @select(!!columns)\n          @filter begin \n              percent &lt; !!condition1\n              value &lt; !!condition2\n          end\n          @collect\n          end\n1\u00d73 DataFrame\n Row \u2502 id       value   percent  \n     \u2502 String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA            1       0.1\n</code></pre> <p>source</p> <p># <code>TidierDB.@left_join</code> \u2014 Macro.</p> <pre><code>@left_join(sql_query, join_table, new_table_col, orignal_table_col)\n</code></pre> <p>Perform a left join between two SQL queries based on a specified condition.  This syntax here is slightly different than TidierData.jl, however, because  SQL does not drop the joining column, for the metadata storage, it is  preferrable for the names to be different </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The primary SQL query to operate on.</li> <li><code>join_table</code>: The secondary SQL table to join with the primary query table.</li> <li><code>new_table_col</code>: Column from the new table that matches for join.</li> <li><code>orignal_table_col</code>: Column from the original table that matches for join.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; copy_to(db, df2, \"df_join\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @left_join(df_join, id2, id)\n         @collect\n       end\n10\u00d77 DataFrame\n Row \u2502 id       groups   value   percent   id2      category  score   \n     \u2502 String?  String?  Int64?  Float64?  String?  String?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1  AA       X              88\n   2 \u2502 AC       bb            3       0.3  AC       Y              92\n   3 \u2502 AE       bb            5       0.5  AE       X              77\n   4 \u2502 AG       bb            2       0.7  AG       Y              83\n   5 \u2502 AI       bb            4       0.9  AI       X              95\n   6 \u2502 AB       aa            2       0.2  missing  missing   missing \n   7 \u2502 AD       aa            4       0.4  missing  missing   missing \n   8 \u2502 AF       aa            1       0.6  missing  missing   missing \n   9 \u2502 AH       aa            3       0.8  missing  missing   missing \n  10 \u2502 AJ       aa            5       1.0  missing  missing   missing \n</code></pre> <p>source</p> <p># <code>TidierDB.@mutate</code> \u2014 Macro.</p> <pre><code>@mutate(sql_query, exprs...)\n</code></pre> <p>Mutate SQL table rows by adding new columns or modifying existing ones.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>exprs</code>: Expressions for mutating the table. New columns can be added or existing columns modified using column_name = expression syntax, where expression can involve existing columns.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @mutate(value = value * 4, new_col = percent^2)\n         @collect\n       end\n10\u00d75 DataFrame\n Row \u2502 id       groups   value   percent   new_col  \n     \u2502 String?  String?  Int64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            4       0.1      0.01\n   2 \u2502 AB       aa            8       0.2      0.04\n   3 \u2502 AC       bb           12       0.3      0.09\n   4 \u2502 AD       aa           16       0.4      0.16\n   5 \u2502 AE       bb           20       0.5      0.25\n   6 \u2502 AF       aa            4       0.6      0.36\n   7 \u2502 AG       bb            8       0.7      0.49\n   8 \u2502 AH       aa           12       0.8      0.64\n   9 \u2502 AI       bb           16       0.9      0.81\n  10 \u2502 AJ       aa           20       1.0      1.0\n</code></pre> <p>source</p> <p># <code>TidierDB.@rename</code> \u2014 Macro.</p> <pre><code>@rename(sql_query, renamings...)\n</code></pre> <p>Rename one or more columns in a SQL query.</p> <p>Arguments</p> <p>-<code>sql_query</code>: The SQL query to operate on. -<code>renamings</code>: One or more pairs of old and new column names, specified as new name = old name </p> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n       @rename(new_name = percent)\n       @collect\n       end\n10\u00d74 DataFrame\n Row \u2502 id       groups   value   new_name \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1\n   2 \u2502 AB       aa            2       0.2\n   3 \u2502 AC       bb            3       0.3\n   4 \u2502 AD       aa            4       0.4\n   5 \u2502 AE       bb            5       0.5\n   6 \u2502 AF       aa            1       0.6\n   7 \u2502 AG       bb            2       0.7\n   8 \u2502 AH       aa            3       0.8\n   9 \u2502 AI       bb            4       0.9\n  10 \u2502 AJ       aa            5       1.0\n</code></pre> <p>source</p> <p># <code>TidierDB.@right_join</code> \u2014 Macro.</p> <pre><code>@right_join(sql_query, join_table, new_table_col, orignal_table_col)\n</code></pre> <p>Perform a right join between two SQL queries based on a specified condition.  This syntax here is slightly different than TidierData.jl, however, because  SQL does not drop the joining column, for the metadata storage, it is  preferrable for the names to be different </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The primary SQL query to operate on.</li> <li><code>join_table</code>: The secondary SQL table to join with the primary query table.</li> <li><code>new_table_col</code>: Column from the new table that matches for join.</li> <li><code>orignal_table_col</code>: Column from the original table that matches for join.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; copy_to(db, df2, \"df_join\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @right_join(df_join, id2, id)\n         @collect\n       end\n7\u00d77 DataFrame\n Row \u2502 id       groups   value    percent    id2      category  score  \n     \u2502 String?  String?  Int64?   Float64?   String?  String?   Int64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb             1        0.1  AA       X             88\n   2 \u2502 AC       bb             3        0.3  AC       Y             92\n   3 \u2502 AE       bb             5        0.5  AE       X             77\n   4 \u2502 AG       bb             2        0.7  AG       Y             83\n   5 \u2502 AI       bb             4        0.9  AI       X             95\n   6 \u2502 missing  missing  missing  missing    AK       Y             68\n   7 \u2502 missing  missing  missing  missing    AM       X             74\n</code></pre> <p>source</p> <p># <code>TidierDB.@select</code> \u2014 Macro.</p> <pre><code>@select(sql_query, columns)\n</code></pre> <p>Select specified columns from a SQL table.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to select columns from.</li> <li><code>columns</code>: Expressions specifying the columns to select. Columns can be specified by name,                and new columns can be created with expressions using existing column values.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @select(groups:percent)\n         @collect\n       end\n10\u00d73 DataFrame\n Row \u2502 groups   value   percent  \n     \u2502 String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 bb            1       0.1\n   2 \u2502 aa            2       0.2\n   3 \u2502 bb            3       0.3\n   4 \u2502 aa            4       0.4\n   5 \u2502 bb            5       0.5\n   6 \u2502 aa            1       0.6\n   7 \u2502 bb            2       0.7\n   8 \u2502 aa            3       0.8\n   9 \u2502 bb            4       0.9\n  10 \u2502 aa            5       1.0\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @select(contains(\"e\"))\n         @collect\n       end\n10\u00d72 DataFrame\n Row \u2502 value   percent  \n     \u2502 Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      1       0.1\n   2 \u2502      2       0.2\n   3 \u2502      3       0.3\n   4 \u2502      4       0.4\n   5 \u2502      5       0.5\n   6 \u2502      1       0.6\n   7 \u2502      2       0.7\n   8 \u2502      3       0.8\n   9 \u2502      4       0.9\n  10 \u2502      5       1.0\n</code></pre> <p>source</p> <p># <code>TidierDB.@semi_join</code> \u2014 Macro.</p> <pre><code>@semi_join(sql_query, join_table, new_table_col, orignal_table_col)\n</code></pre> <p>Perform an semi join between two SQL queries based on a specified condition.  This syntax here is slightly different than TidierData.jl, however, because  SQL does not drop the joining column, for the metadata storage, it is  preferrable for the names to be different </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The primary SQL query to operate on.</li> <li><code>join_table</code>: The secondary SQL table to join with the primary query table.</li> <li><code>new_table_col</code>: Column from the new table that matches for join.</li> <li><code>orignal_table_col</code>: Column from the original table that matches for join.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; copy_to(db, df2, \"df_join\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @semi_join(df_join, id2, id)\n         @collect\n       end\n5\u00d74 DataFrame\n Row \u2502 id       groups   value   percent  \n     \u2502 String?  String?  Int64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1\n   2 \u2502 AC       bb            3       0.3\n   3 \u2502 AE       bb            5       0.5\n   4 \u2502 AG       bb            2       0.7\n   5 \u2502 AI       bb            4       0.9\n</code></pre> <p>source</p> <p># <code>TidierDB.@slice_max</code> \u2014 Macro.</p> <pre><code>@slice_max(sql_query, column, n = 1)\n</code></pre> <p>Select rows with the largest values in specified column. This will always return ties. </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>column</code>: Column to identify the smallest values.</li> <li><code>n</code>: The number of rows to select with the largest values for each specified column. Default is 1, which selects the row with the smallest value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @slice_max(value, n = 2)\n         @collect\n       end;\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @slice_max(value)\n         @collect\n       end\n2\u00d75 DataFrame\n Row \u2502 id       groups   value   percent   rank_col \n     \u2502 String?  String?  Int64?  Float64?  Int64?   \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AE       bb            5       0.5         1\n   2 \u2502 AJ       aa            5       1.0         1\n</code></pre> <p>source</p> <p># <code>TidierDB.@slice_min</code> \u2014 Macro.</p> <pre><code>@slice_min(sql_query, column, n = 1)\n</code></pre> <p>Select rows with the smallest values in specified column. This will always return ties. </p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>column</code>: Column to identify the smallest values.</li> <li><code>n</code>: The number of rows to select with the smallest values for each specified column. Default is 1, which selects the row with the smallest value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @slice_min(value, n = 2)\n         @collect\n       end;\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @slice_min(value)\n         @collect\n       end\n2\u00d75 DataFrame\n Row \u2502 id       groups   value   percent   rank_col \n     \u2502 String?  String?  Int64?  Float64?  Int64?   \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1         1\n   2 \u2502 AF       aa            1       0.6         1\n</code></pre> <p>source</p> <p># <code>TidierDB.@slice_sample</code> \u2014 Macro.</p> <pre><code>@slice_sample(sql_query, n)\n</code></pre> <p>Randomly select a specified number of rows from a SQL table.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>n</code>: The number of rows to randomly select.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @slice_sample(n = 2)\n         @collect\n       end;\n\njulia&gt; @chain db_table(db, :df_mem) begin\n       @slice_sample()\n       @collect\n       end;\n</code></pre> <p>source</p> <p># <code>TidierDB.@summarise</code> \u2014 Macro.</p> <pre><code>   @summarise(sql_query, exprs...)\n</code></pre> <p>Aggregate and summarize specified columns of a SQL table.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>exprs</code>: Expressions defining the aggregation and summarization operations. These can specify simple aggregations like mean, sum, and count, or more complex expressions involving existing column values.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @summarise(across((value:percent), (mean, sum)))\n         @arrange(groups)\n         @collect\n       end\n2\u00d75 DataFrame\n Row \u2502 groups   mean_value  mean_percent  sum_value  sum_percent \n     \u2502 String?  Float64?    Float64?      Int128?    Float64?    \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa              3.0           0.6         15          3.0\n   2 \u2502 bb              3.0           0.5         15          2.5\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @summarise(test = sum(percent), n = n())\n         @arrange(groups)\n         @collect\n       end\n2\u00d73 DataFrame\n Row \u2502 groups   test      n      \n     \u2502 String?  Float64?  Int64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa            3.0       5\n   2 \u2502 bb            2.5       5\n</code></pre> <p>source</p> <p># <code>TidierDB.@summarize</code> \u2014 Macro.</p> <pre><code>   @summarize(sql_query, exprs...)\n</code></pre> <p>Aggregate and summarize specified columns of a SQL table.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>exprs</code>: Expressions defining the aggregation and summarization operations. These can specify simple aggregations like mean, sum, and count, or more complex expressions involving existing column values.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @summarise(across((ends_with(\"e\"), starts_with(\"p\")), (mean, sum)))\n         @arrange(groups)\n         @collect\n       end\n2\u00d75 DataFrame\n Row \u2502 groups   mean_value  mean_percent  sum_value  sum_percent \n     \u2502 String?  Float64?    Float64?      Int128?    Float64?    \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa              3.0           0.6         15          3.0\n   2 \u2502 bb              3.0           0.5         15          2.5\n\njulia&gt; @chain db_table(db, :df_mem) begin\n         @group_by(groups)\n         @summarise(test = sum(percent), n = n())\n         @arrange(groups)\n         @collect\n       end\n2\u00d73 DataFrame\n Row \u2502 groups   test      n      \n     \u2502 String?  Float64?  Int64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 aa            3.0       5\n   2 \u2502 bb            2.5       5\n</code></pre> <p>source</p> <p># <code>TidierDB.@window_frame</code> \u2014 Macro.</p> <pre><code>@window_frame(sql_query, frame_start::Int, frame_end::Int)\n</code></pre> <p>Define the window frame for window functions in a SQL query, specifying the range of rows to include in the calculation relative to the current row.</p> <p>Arguments</p> <p>sql_query: The SQL query to operate on, expected to be an instance of SQLQuery.</p> <ul> <li><code>frame_start</code>: The starting point of the window frame. A positive value indicates the start after the current row (FOLLOWING), a negative value indicates before the current row (PRECEDING), and 0 indicates the current row.</li> <li><code>frame_end</code>: The ending point of the window frame. A positive value indicates the end after the current row (FOLLOWING), a negative value indicates before the current row (PRECEDING), and 0 indicates the current row.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n</code></pre> <p>source</p> <p># <code>TidierDB.@window_order</code> \u2014 Macro.</p> <pre><code>   @window_order(sql_query, columns...)\n</code></pre> <p>Specify the order of rows for window functions within a SQL query.</p> <p>Arguments</p> <ul> <li><code>sql_query</code>: The SQL query to operate on.</li> <li><code>columns</code>: Columns to order the rows by for the window function. Can include multiple columns for nested sorting. Prepend a column name with - for descending order.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9], \n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10], \n                        value = repeat(1:5, 2), \n                        percent = 0.1:0.1:1.0);\n\njulia&gt; db = connect(duckdb());\n\njulia&gt; copy_to(db, df, \"df_mem\");\n</code></pre> <p>source</p> <p></p> <p></p>"},{"location":"reference/#reference-internal-functions","title":"Reference - Internal functions","text":""},{"location":"examples/generated/UserGuide/Snowflake/","title":"Using Snowflake","text":"<p>Establishing a connection with the Snowflake SQL Rest API requires a OAuth token specific to the Role the user will use to query tables with.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/Snowflake/#connecting","title":"Connecting","text":"<p>Connection is established with the <code>connect</code> function as shown below. Connection requires 5 items as strings</p> <ul> <li>Account Identifier</li> <li>OAuth token</li> <li>Database Name</li> <li>Schema Name</li> <li>Compute Warehouse name</li> </ul> <p>Two things to note:</p> <ul> <li>Your OAuth Token may frequently expire, which may require you to rerun your connection line.</li> <li> <p>Since each time <code>db_table</code> runs, it runs a query to pull the metadata, you may choose to use run <code>db_table</code> and save the results, and use these results with<code>from_query()</code></p> <ul> <li>This will reduce the number of queries to your database</li> <li>Allow you to build a a SQL query and <code>@show_query</code> even if the OAuthtoken has expired. To <code>@collect</code> you will have to reconnect and rerun dbtable if your OAuth token has expired</li> </ul> </li> </ul> <pre><code>set_sql_mode(snowflake())\nac_id = \"string_id\"\ntoken = \"OAuth_token_string\"\ncon = connect(:snowflake, ac_id, token, \"DEMODB\", \"PUBLIC\", \"COMPUTE_WH\")\n# After connection is established, a you may begin querying.\nstable_table_metadata = db_table(con, \"MTCARS\")\n@chain from_query(stable_table_metadata) begin\n   @select(WT)\n   @mutate(TEST = WT *2)\n   #@aside @show_query _\n   @collect\nend\n</code></pre> <pre><code>32\u00d72 DataFrame\n Row \u2502 WT       TEST\n     \u2502 Float64  Float64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502   2.62     5.24\n   2 \u2502   2.875    5.75\n   3 \u2502   2.32     4.64\n   4 \u2502   3.215    6.43\n  \u22ee  \u2502    \u22ee        \u22ee\n  29 \u2502   3.17     6.34\n  30 \u2502   2.77     5.54\n  31 \u2502   3.57     7.14\n  32 \u2502   2.78     5.56\n         24 rows omitted\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/athena/","title":"Using Athena","text":"<p>To use the Athena AWS backend with TidierDB, set up and a small syntax difference are covered here.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/athena/#connecting","title":"Connecting","text":"<p>Connection is established through AWS.jl as shwon below.</p> <pre><code>using TidierDB, AWS\nset_sql_mode(athena())\n# Replace your credentials as needed below\naws_access_key_id = get(ENV,\"AWS_ACCESS_KEY_ID\",\"key\")\naws_secret_access_key = get(ENV, \"AWS_SECRET_ACCESS_KEY\",\"secret_key\")\naws_region = get(ENV,\"AWS_DEFAULT_REGION\",\"region\")\n\nconst AWS_GLOBAL_CONFIG = Ref{AWS.AWSConfig}()\ncreds = AWSCredentials(aws_access_key_id, aws_secret_access_key)\n\nAWS_GLOBAL_CONFIG[] = AWS.global_aws_config(region=aws_region, creds=creds)\n\ncatalog = \"AwsDataCatalog\"\nworkgroup = \"primary\"\ndb = \"demodb\"\nall_results = true\nresults_per_increment = 10\nout_loc = \"s3://location/\"\n\nathena_params = Dict(\n    \"ResultConfiguration\" =&gt; Dict(\n        \"OutputLocation\" =&gt; out_loc\n    ),\n    \"QueryExecutionContext\" =&gt; Dict(\n        \"Database\" =&gt; db,\n        \"Catalog\" =&gt; catalog\n    ),\n    \"Workgroup\" =&gt; workgroup\n)\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/athena/#db_table-differences","title":"<code>db_table</code> differences","text":"<p>There are two differences for <code>db_table</code> which are seen in the query below</p> <ol> <li>The table needs to be passed as a string in the format database.table, ie <code>\"demodb.table_name</code></li> <li><code>db_table</code> requires a third argument: the athena_params from above.</li> </ol> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/athena/#leveraging-from_query-with-athena-to-reduce-number-of-queries","title":"Leveraging <code>from_query</code> with Athena to reduce number of queries","text":"<p>Throughout TidierDB, each time <code>db_table</code> is called, it queries the databases to get the metadata. Consider how AWS Athena logs queries, a user may want to reduce the number of queries. This can be done saving the results of <code>db_table</code>, and then using from_query with those results for furthe queries as shown below.</p> <pre><code>mtcars = db_table(AWS_GLOBAL_CONFIG[], \"demodb.mtcars\", athena_params)\n@chain from_query(mtcars) begin\n    @filter(cyl &gt; 4)\n    @group_by(cyl)\n    @summarize(mpg = mean(mpg))\n   #@show_query\n    @collect\nend\n</code></pre> <pre><code>2\u00d72 DataFrame\n Row \u2502 cyl    mpg\n     \u2502 Int64  Float64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     6  19.7429\n   2 \u2502     8  15.1\n</code></pre> <p>I would like to acknowledge the work of Manu Francis and this blog post, which helped guide this process</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/databricks/","title":"Using Databricks","text":"<p>Establishing a connection with the Databricks SQL Rest API requires a token.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/databricks/#connecting","title":"Connecting","text":"<p>Connection is established with the <code>connect</code> function as shown below. Connection requires 5 items as strings</p> <ul> <li>Account Instance : how to find your instance</li> <li>OAuth token : how to generate your token</li> <li>Database Name</li> <li>Schema Name</li> <li>warehouse_id</li> </ul> <p>One thing to note, Since each time <code>db_table</code> runs, it runs a query to pull the metadata, you may choose to use run <code>db_table</code> and save the results, and use these results with <code>from_query()</code>. This will reduce the number of queries to your database and is illustrated below.</p> <pre><code>set_sql_mode(databricks())\ninstance_id = \"string_id\"\ntoken \"string_token\"\nwarehouse_id = \"e673cd4f387f964a\"\ncon = connect(:databricks, instance_id, token, \"DEMODB\", \"PUBLIC\", warehouse_id)\n# After connection is established, a you may begin querying.\nstable_table_metadata = db_table(con, \"mtcars\")\n@chain from_query(stable_table_metadata) begin\n   @select(wt)\n   @mutate(test = wt *2)\n   #@aside @show_query _\n   @collect\nend\n</code></pre> <pre><code>32\u00d72 DataFrame\n Row \u2502 wt       test\n     \u2502 Float64  Float64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502   2.62     5.24\n   2 \u2502   2.875    5.75\n   3 \u2502   2.32     4.64\n   4 \u2502   3.215    6.43\n  \u22ee  \u2502    \u22ee        \u22ee\n  29 \u2502   3.17     6.34\n  30 \u2502   2.77     5.54\n  31 \u2502   3.57     7.14\n  32 \u2502   2.78     5.56\n         24 rows omitted\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/from_queryex/","title":"Reusing Part of a Query","text":"<p>While using TidierDB, you may need to generate part of a query and reuse it multiple times. <code>from_query()</code> enables a query portion to be reused multiple times as shown below.</p> <pre><code>import TidierDB as DB\ncon = DB.connect(duckdb())\nmtcars_path = \"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\"\n</code></pre> <p>Start a query to analyze fuel efficiency by number of cylinders. However, to further build on this query later, end the chain without using <code>@show_query</code> or <code>@collect</code></p> <pre><code>query = DB.@chain DB.db_table(con, mtcars_path) begin\n    DB.@group_by cyl\n    DB.@summarize begin\n        across(mpg, (mean, minimum, maximum))\n        num_cars = n()\n        end\n    DB.@mutate begin\n        efficiency = case_when(\n            mean_mpg &gt;= 25, \"High\",\n            mean_mpg &gt;= 15, \"Moderate\",\n            \"Low\" )\n       end\nend;\n</code></pre> <p>Now, <code>from_query</code> will allow you to reuse the query to calculate the average horsepower for each efficiency category</p> <pre><code>DB.@chain DB.from_query(query) begin\n   DB.@left_join(mtcars2, cyl, cyl)\n   DB.@group_by(efficiency)\n   DB.@summarize(avg_hp = mean(hp))\n   DB.@collect\nend\n</code></pre> <pre><code>2\u00d72 DataFrame\n Row \u2502 efficiency  avg_hp\n     \u2502 String?     Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Moderate    180.238\n   2 \u2502 High         82.6364\n</code></pre> <p>Reuse the query again to find the car with the highest MPG for each cylinder category</p> <pre><code>DB.@chain DB.from_query(query) begin\n   DB.@left_join(mtcars2, cyl, cyl)\n   DB.@group_by cyl\n   DB.@slice_max(mpg)\n   DB.@select model cyl mpg\n   DB.@collect\nend\n</code></pre> <pre><code>3\u00d73 DataFrame\n Row \u2502 model             cyl     mpg\n     \u2502 String?           Int64?  Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Pontiac Firebird       8      19.2\n   2 \u2502 Toyota Corolla         4      33.9\n   3 \u2502 Hornet 4 Drive         6      21.4\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/from_queryex/#preview-or-save-an-intermediate-table","title":"Preview or save an intermediate table","text":"<p>While querying a dataset, you may wish to see an intermediate table, or even save it. You can use <code>@aside</code> and <code>from_query(_)</code>, illustrated below, to do just that. While we opted to print the results in this simple example below, we could have saved them by using <code>name = DB.@chain...</code></p> <pre><code>import ClickHouse;\nconn = conn = DB.connect(DB.clickhouse(); host=\"localhost\", port=19000, database=\"default\", user=\"default\", password=\"\")\npath = \"https://huggingface.co/datasets/maharshipandya/spotify-tracks-dataset/resolve/refs%2Fconvert%2Fparquet/default/train/0000.parquet\"\nDB.@chain DB.db_table(conn, path) begin\n   DB.@count(cyl)\n   @aside println(DB.@chain DB.from_query(_) DB.@head(5) DB.@collect)\n   DB.@arrange(desc(count))\n   DB.@collect\nend\n</code></pre> <pre><code>5\u00d72 DataFrame\n Row \u2502 artists  count\n     \u2502 String?  UInt64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing       1\n   2 \u2502 Wizo          3\n   3 \u2502 MAGIC!        3\n   4 \u2502 Macaco        1\n   5 \u2502 SOYOU         1\n31438\u00d72 DataFrame\n   Row \u2502 artists          count\n       \u2502 String?          UInt64\n\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n     1 \u2502 The Beatles         279\n     2 \u2502 George Jones        271\n     3 \u2502 Stevie Wonder       236\n     4 \u2502 Linkin Park         224\n     5 \u2502 Ella Fitzgerald     222\n     6 \u2502 Prateek Kuhad       217\n     7 \u2502 Feid                202\n   \u22ee   \u2502        \u22ee           \u22ee\n 31432 \u2502 Leonard               1\n 31433 \u2502 marcos g              1\n 31434 \u2502 BLVKSHP               1\n 31435 \u2502 Memtrix               1\n 31436 \u2502 SOYOU                 1\n 31437 \u2502 Macaco                1\n 31438 \u2502 missing               1\n               31424 rows omitted\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/functions_pass_to_DB/","title":"Writing Functions/Macros with TidierDB Chains","text":"<p>How can functions pass arguments to a TidierDB chain?</p> <p>In short, you have to use a macro instead in conjuction with <code>@interpolate</code></p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/functions_pass_to_DB/#setting-up-the-macro","title":"Setting up the macro","text":"<p>To write a macro that will take arguments and pass them to a TidierDB chain, there are 3 steps:</p> <ol> <li>Write macro with the desired argument(s), and, after the quote, add the chain. Arguments to be changed/interpolated must be prefixed with <code>!!</code></li> <li>Use <code>@interpolate</code> to make these arguemnts accessible to the chain. <code>@interpolate</code> takes touples as argument (one for the <code>!!</code>name, and one for the actual content you want the chain to use)</li> <li>Run <code>@interpolate</code> and then the chain macro sequentially</li> </ol> <pre><code>using TidierDB\ndb = connect(duckdb())\npath = \"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\"\ncopy_to(db, path, \"mtcars\");\n\n# STEP 1\nmacro f1(conditions, columns) # The arguemnt names will be names of the `!!` values\n    return quote\n    # add chain here\n      @chain db_table(db, :mtcars) begin\n           @filter(!!conditions &gt; 3)\n           @select(!!columns)\n           @aside @show_query _\n           @collect\n         end # ends the chain\n    end # ends the quote.\nend # ends the macro\n</code></pre> <pre><code># STEP 2\nvariable = :gear;\ncols = [:model, :mpg, :gear, :wt];\n@interpolate((conditions, variable), (columns, cols));\n@f1(variable, cols)\n</code></pre> <pre><code>17\u00d74 DataFrame\n Row \u2502 model           mpg       gear    wt\n     \u2502 String?         Float64?  Int32?  Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Mazda RX4           21.0       4     2.62\n   2 \u2502 Mazda RX4 Wag       21.0       4     2.875\n   3 \u2502 Datsun 710          22.8       4     2.32\n  \u22ee  \u2502       \u22ee            \u22ee        \u22ee        \u22ee\n  15 \u2502 Ferrari Dino        19.7       5     2.77\n  16 \u2502 Maserati Bora       15.0       5     3.57\n  17 \u2502 Volvo 142E          21.4       4     2.78\n                                   11 rows omitted\n</code></pre> <p>Lets say you wanted to filter on new variable with a different name and select new columns,</p> <pre><code>new_condition = :wt;\nnew_cols = [:model, :drat]\n@interpolate((conditions, new_condition), (columns, new_cols));\n@f1(new_condition, new_cols)\n</code></pre> <pre><code>20\u00d72 DataFrame\n Row \u2502 model              drat\n     \u2502 String?            Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Hornet 4 Drive         3.08\n   2 \u2502 Hornet Sportabout      3.15\n   3 \u2502 Valiant                2.76\n  \u22ee  \u2502         \u22ee             \u22ee\n  18 \u2502 Pontiac Firebird       3.08\n  19 \u2502 Ford Pantera L         4.22\n  20 \u2502 Maserati Bora          3.54\n                    14 rows omitted\n</code></pre> <p>You can also interpolate vectors of strings into a <code>@filter(col in (values))</code> as well by using the following syntax <code>@filter(col in [!!values])</code></p> <p>In short, the first argument in <code>@interpolate</code> must be the name of the macro argument it refers to, and the second argument is what you would like to replace it.</p> <p>We recognize this adds friction and that it is not ideal, but given the TidierDB macro expressions/string interplay, this is currently the most graceful and functional option available and hopefully a temporary solution to better interpolation that mirrors TidierData.jl.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/getting_started/","title":"Getting Started","text":"<p>To use TidierDB.jl, you will have to set up a connection. TidierDB.jl gives you access to duckdb via <code>duckdb_open</code> and <code>duckdb_connect</code>. However, to use MySql, ClickHouse, MSSQL, Postgres, or SQLite, you will have to load those packages in first.</p> <p>If you plan to use TidierDB.jl with TidierData.jl or Tidier.jl, it is most convenenient to load the packages as follows:</p> <pre><code>using TidierData\nimport TidierDB as DB\n</code></pre> <p>Alternatively, <code>using Tidier</code> will import TidierDB in the above manner for you, where TidierDB functions and macros will be available as <code>DB.@mutate()</code> and so on, and the TidierData equivalent would be <code>@mutate()</code>.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/getting_started/#connecting","title":"Connecting","text":"<p>To connect to a database, you can uset the <code>connect</code> function  as shown below, or establish your own connection through the respecitve libraries.</p> <p>For example Connecting to MySQL</p> <pre><code>conn = DB.connect(DB.mysql(); host=\"localhost\", user=\"root\", password=\"password\", db=\"mydb\")\n</code></pre> <p>versus connecting to DuckDB</p> <pre><code>conn = DB.connect(DB.duckdb())\n</code></pre> <p>You can also use establish a connection through an alternate method that you preferred, and use that as your connection as well.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/getting_started/#package-extensions","title":"Package Extensions","text":"<p>The following backends utilize package extensions. To use one of backends listed below, you will need to write <code>using Library</code></p> <ul> <li>ClickHouse: <code>import ClickHouse</code></li> <li>MySQL and MariaDB: <code>using MySQL</code></li> <li>MSSQL: <code>using ODBC</code></li> <li>Postgres: <code>using LibPQ</code></li> <li>SQLite: <code>using SQLite</code></li> <li>Athena: <code>using AWS</code></li> <li>Oracle: <code>using ODBC</code></li> <li>Google BigQuery: <code>using GoogleCloud</code></li> </ul> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/getting_started/#db_table","title":"<code>db_table</code>","text":"<p>What does <code>db_table</code> do?</p> <p><code>db_table</code> starts the underlying SQL query struct, in addition to pulling the table metadata and storing it there. Storing metadata is what enables a lazy interface that also supports tidy selection.</p> <ul> <li><code>db_table</code> has two required arguments: <code>connection</code> and <code>table</code></li> <li><code>table</code> can be a table name on a database or a path/url to file to read.  When passing <code>db_table</code> a path or url, the table is not copied into memory.</li> <li>With DuckDB and ClickHouse, if you have a folder of multiple files to read, you can use <code>*</code> read in all files matching the pattern.</li> <li>For example, the below would read all files that end in <code>.csv</code> in the given folder.</li> </ul> <pre><code>db_table(db, \"folder/path/*.csv\")\n</code></pre> <p><code>db_table</code> also supports iceberg, delta, and S3 file paths via DuckDB.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/getting_started/#minimizing-compute-costs","title":"Minimizing Compute Costs","text":"<p>If you are working with a backend where compute cost is important, it will be important to minimize using <code>db_table</code> as this will requery for metadata each time. Compute costs are relevant to backends such as AWS, databricks and Snowflake.</p> <p>To do this, save the results of <code>db_table</code> and use them with <code>from_query</code>. Using <code>from_query</code> pulls the relevant information (metadata, con, etc) from the mutable SQLquery struct, allowing you to repeatedly query and collect the table without requerying for the metadata each time</p> <pre><code>table = DB.db_table(con, \"path\")\n@chain DB.from_query(table) begin\n    ## data wrangling here\nend\n</code></pre> <p>Tip: Setting <code>t(table) = from_query(table)</code> will save some keystrokes. This means after saving the results of <code>db_table</code> you can start all chains/refer to the data with `t(table)`` \u2013-</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/ibis_comp/","title":"TidierDB.jl vs Ibis","text":""},{"location":"examples/generated/UserGuide/ibis_comp/#comparing-tidierdb-vs-ibis","title":"Comparing TidierDB vs Ibis","text":"<p>TidierDB is a reimplementation of dbplyr from R, so the syntax is remarkably similar. But how does TidierDB compare to Python's Ibis? This page will perform a similar comparison to the Ibis Documentation comparing Ibis and dplyr</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#set-up","title":"Set up","text":"<p>Ibis</p> <pre><code>import ibis\nimport ibis.selectors as s # allows for different styles of column selection\nfrom ibis import _ # eliminates need to type table name before each column vs typing cols as strings\nibis.options.interactive = True # automatically collects first 10 rows of table\n</code></pre> <p>TidierDB</p> <pre><code>using TidierDB\ndb = connect(duckdb())\n# This next line is optional, but it will let us avoid writing `db_table` or `from_query` for each query\nt(table) = from_query(table)\n</code></pre> <p>Of note, TidierDB does not yet have an \"interactive mode\" so each example result will be collected.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#loading-data","title":"Loading Data","text":"<p>With Ibis, there are specific functions to read in different file types</p> <pre><code>mtcars = ibis.read_csv(\"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\")\n</code></pre> <p>In TidierDB, there is only <code>db_table</code>, which determines the file type and generates the syntax appropriate for the backend in use.</p> <pre><code>mtcars = db_table(db, \"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#previewing-the-data","title":"Previewing the data","text":"<p>TidierDB and Ibis use <code>head</code>/<code>@head</code> to preview the first rows of a dataset. Ibis</p> <pre><code>mtcars.head(6)\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 model             \u2503 mpg     \u2503 cyl   \u2503 disp    \u2503 hp    \u2503 drat    \u2503 wt      \u2503 qsec    \u2503 vs    \u2503 am    \u2503 gear  \u2503 carb  \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 string            \u2502 float64 \u2502 int64 \u2502 float64 \u2502 int64 \u2502 float64 \u2502 float64 \u2502 float64 \u2502 int64 \u2502 int64 \u2502 int64 \u2502 int64 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 Mazda RX4         \u2502    21.0 \u2502     6 \u2502   160.0 \u2502   110 \u2502    3.90 \u2502   2.620 \u2502   16.46 \u2502     0 \u2502     1 \u2502     4 \u2502     4 \u2502\n\u2502 Mazda RX4 Wag     \u2502    21.0 \u2502     6 \u2502   160.0 \u2502   110 \u2502    3.90 \u2502   2.875 \u2502   17.02 \u2502     0 \u2502     1 \u2502     4 \u2502     4 \u2502\n\u2502 Datsun 710        \u2502    22.8 \u2502     4 \u2502   108.0 \u2502    93 \u2502    3.85 \u2502   2.320 \u2502   18.61 \u2502     1 \u2502     1 \u2502     4 \u2502     1 \u2502\n\u2502 Hornet 4 Drive    \u2502    21.4 \u2502     6 \u2502   258.0 \u2502   110 \u2502    3.08 \u2502   3.215 \u2502   19.44 \u2502     1 \u2502     0 \u2502     3 \u2502     1 \u2502\n\u2502 Hornet Sportabout \u2502    18.7 \u2502     8 \u2502   360.0 \u2502   175 \u2502    3.15 \u2502   3.440 \u2502   17.02 \u2502     0 \u2502     0 \u2502     3 \u2502     2 \u2502\n\u2502 Valiant           \u2502    18.1 \u2502     6 \u2502   225.0 \u2502   105 \u2502    2.76 \u2502   3.460 \u2502   20.22 \u2502     1 \u2502     0 \u2502     3 \u2502     1 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>TidierDB</p> <pre><code>@chain t(mtcars) @head(6) @collect\n</code></pre> <pre><code>6\u00d712 DataFrame\n Row \u2502 model              mpg       cyl     disp      hp      drat      wt        qsec      vs      am      gear    carb\n     \u2502 String?            Float64?  Int64?  Float64?  Int64?  Float64?  Float64?  Float64?  Int64?  Int64?  Int64?  Int64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Mazda RX4              21.0       6     160.0     110      3.9      2.62      16.46       0       1       4       4\n   2 \u2502 Mazda RX4 Wag          21.0       6     160.0     110      3.9      2.875     17.02       0       1       4       4\n   3 \u2502 Datsun 710             22.8       4     108.0      93      3.85     2.32      18.61       1       1       4       1\n   4 \u2502 Hornet 4 Drive         21.4       6     258.0     110      3.08     3.215     19.44       1       0       3       1\n   5 \u2502 Hornet Sportabout      18.7       8     360.0     175      3.15     3.44      17.02       0       0       3       2\n   6 \u2502 Valiant                18.1       6     225.0     105      2.76     3.46      20.22       1       0       3       1\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#filtering","title":"Filtering","text":"<p>The example below demonstrates how to filter using multiple criteria in both Ibis and TidierData Ibis</p> <pre><code>mtcars.filter(((_.mpg &gt; 22) &amp; (_.drat &gt; 4) | (_.hp == 113)))\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 model          \u2503 mpg     \u2503 cyl   \u2503 disp    \u2503 hp    \u2503 drat    \u2503 wt      \u2503 qsec    \u2503 vs    \u2503 am    \u2503 gear  \u2503 carb  \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 string         \u2502 float64 \u2502 int64 \u2502 float64 \u2502 int64 \u2502 float64 \u2502 float64 \u2502 float64 \u2502 int64 \u2502 int64 \u2502 int64 \u2502 int64 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 Lotus Europa   \u2502    30.4 \u2502     4 \u2502    95.1 \u2502   113 \u2502    3.77 \u2502   1.513 \u2502   16.90 \u2502     1 \u2502     1 \u2502     5 \u2502     2 \u2502\n\u2502 Fiat 128       \u2502    32.4 \u2502     4 \u2502    78.7 \u2502    66 \u2502    4.08 \u2502   2.200 \u2502   19.47 \u2502     1 \u2502     1 \u2502     4 \u2502     1 \u2502\n\u2502 Honda Civic    \u2502    30.4 \u2502     4 \u2502    75.7 \u2502    52 \u2502    4.93 \u2502   1.615 \u2502   18.52 \u2502     1 \u2502     1 \u2502     4 \u2502     2 \u2502\n\u2502 Toyota Corolla \u2502    33.9 \u2502     4 \u2502    71.1 \u2502    65 \u2502    4.22 \u2502   1.835 \u2502   19.90 \u2502     1 \u2502     1 \u2502     4 \u2502     1 \u2502\n\u2502 Fiat X1-9      \u2502    27.3 \u2502     4 \u2502    79.0 \u2502    66 \u2502    4.08 \u2502   1.935 \u2502   18.90 \u2502     1 \u2502     1 \u2502     4 \u2502     1 \u2502\n\u2502 Porsche 914-2  \u2502    26.0 \u2502     4 \u2502   120.3 \u2502    91 \u2502    4.43 \u2502   2.140 \u2502   16.70 \u2502     0 \u2502     1 \u2502     5 \u2502     2 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>TidierDB</p> <pre><code>@chain t(mtcars) begin\n       @filter((mpg &gt; 22 &amp;&amp; drat &gt; 4) || hp == 113)\n       @collect\nend\n</code></pre> <pre><code>6\u00d712 DataFrame\n Row \u2502 model           mpg       cyl     disp      hp      drat      wt        qsec      vs      am      gear    carb\n     \u2502 String?         Float64?  Int64?  Float64?  Int64?  Float64?  Float64?  Float64?  Int64?  Int64?  Int64?  Int64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Lotus Europa        30.4       4      95.1     113      3.77     1.513     16.9        1       1       5       2\n   2 \u2502 Fiat 128            32.4       4      78.7      66      4.08     2.2       19.47       1       1       4       1\n   3 \u2502 Honda Civic         30.4       4      75.7      52      4.93     1.615     18.52       1       1       4       2\n   4 \u2502 Toyota Corolla      33.9       4      71.1      65      4.22     1.835     19.9        1       1       4       1\n   5 \u2502 Fiat X1-9           27.3       4      79.0      66      4.08     1.935     18.9        1       1       4       1\n   6 \u2502 Porsche 914-2       26.0       4     120.3      91      4.43     2.14      16.7        0       1       5       2\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#creating-new-columns","title":"Creating new columns","text":"<p>Both TidierDB and Ibis use <code>mutate</code>/<code>@mutate</code> to add new columns Ibis</p> <pre><code>(\n   mtcars\n        .mutate(kpg = _.mpg * 1.61)\n        .select(\"model\", \"kpg\")\n)\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 model             \u2503 kpg     \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 string            \u2502 float64 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 Mazda RX4         \u2502  33.810 \u2502\n\u2502 Mazda RX4 Wag     \u2502  33.810 \u2502\n\u2502 Datsun 710        \u2502  36.708 \u2502\n\u2502 Hornet 4 Drive    \u2502  34.454 \u2502\n\u2502 Hornet Sportabout \u2502  30.107 \u2502\n\u2502 Valiant           \u2502  29.141 \u2502\n\u2502 Duster 360        \u2502  23.023 \u2502\n\u2502 Merc 240D         \u2502  39.284 \u2502\n\u2502 Merc 230          \u2502  36.708 \u2502\n\u2502 Merc 280          \u2502  30.912 \u2502\n\u2502 \u2026                 \u2502       \u2026 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>TidierDB</p> <pre><code>@chain t(mtcars) begin\n       @mutate(kpg = mpg * 1.61)\n       @select(model, kpg)\n       @collect\nend\n</code></pre> <pre><code>32\u00d72 DataFrame\n Row \u2502 model              kpg\n     \u2502 String?            Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Mazda RX4            33.81\n   2 \u2502 Mazda RX4 Wag        33.81\n   3 \u2502 Datsun 710           36.708\n   4 \u2502 Hornet 4 Drive       34.454\n   5 \u2502 Hornet Sportabout    30.107\n   6 \u2502 Valiant              29.141\n  \u22ee  \u2502         \u22ee             \u22ee\n  27 \u2502 Porsche 914-2        41.86\n  28 \u2502 Lotus Europa         48.944\n  29 \u2502 Ford Pantera L       25.438\n  30 \u2502 Ferrari Dino         31.717\n  31 \u2502 Maserati Bora        24.15\n  32 \u2502 Volvo 142E           34.454\n                    20 rows omitted\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#sorting-columns","title":"Sorting columns","text":"<p>Ibis uses <code>order_by</code> similar to SQLs <code>ORDER BY</code> Ibis</p> <pre><code>mtcars.order_by(_.mpg)\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 model               \u2503 mpg     \u2503 cyl   \u2503 disp    \u2503 hp    \u2503 drat    \u2503 wt      \u2503 qsec    \u2503 vs    \u2503 am    \u2503 gear  \u2503 carb  \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 string              \u2502 float64 \u2502 int64 \u2502 float64 \u2502 int64 \u2502 float64 \u2502 float64 \u2502 float64 \u2502 int64 \u2502 int64 \u2502 int64 \u2502 int64 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 Cadillac Fleetwood  \u2502    10.4 \u2502     8 \u2502   472.0 \u2502   205 \u2502    2.93 \u2502   5.250 \u2502   17.98 \u2502     0 \u2502     0 \u2502     3 \u2502     4 \u2502\n\u2502 Lincoln Continental \u2502    10.4 \u2502     8 \u2502   460.0 \u2502   215 \u2502    3.00 \u2502   5.424 \u2502   17.82 \u2502     0 \u2502     0 \u2502     3 \u2502     4 \u2502\n\u2502 Camaro Z28          \u2502    13.3 \u2502     8 \u2502   350.0 \u2502   245 \u2502    3.73 \u2502   3.840 \u2502   15.41 \u2502     0 \u2502     0 \u2502     3 \u2502     4 \u2502\n\u2502 Duster 360          \u2502    14.3 \u2502     8 \u2502   360.0 \u2502   245 \u2502    3.21 \u2502   3.570 \u2502   15.84 \u2502     0 \u2502     0 \u2502     3 \u2502     4 \u2502\n\u2502 Chrysler Imperial   \u2502    14.7 \u2502     8 \u2502   440.0 \u2502   230 \u2502    3.23 \u2502   5.345 \u2502   17.42 \u2502     0 \u2502     0 \u2502     3 \u2502     4 \u2502\n\u2502 Maserati Bora       \u2502    15.0 \u2502     8 \u2502   301.0 \u2502   335 \u2502    3.54 \u2502   3.570 \u2502   14.60 \u2502     0 \u2502     1 \u2502     5 \u2502     8 \u2502\n\u2502 Merc 450SLC         \u2502    15.2 \u2502     8 \u2502   275.8 \u2502   180 \u2502    3.07 \u2502   3.780 \u2502   18.00 \u2502     0 \u2502     0 \u2502     3 \u2502     3 \u2502\n\u2502 AMC Javelin         \u2502    15.2 \u2502     8 \u2502   304.0 \u2502   150 \u2502    3.15 \u2502   3.435 \u2502   17.30 \u2502     0 \u2502     0 \u2502     3 \u2502     2 \u2502\n\u2502 Dodge Challenger    \u2502    15.5 \u2502     8 \u2502   318.0 \u2502   150 \u2502    2.76 \u2502   3.520 \u2502   16.87 \u2502     0 \u2502     0 \u2502     3 \u2502     2 \u2502\n\u2502 Ford Pantera L      \u2502    15.8 \u2502     8 \u2502   351.0 \u2502   264 \u2502    4.22 \u2502   3.170 \u2502   14.50 \u2502     0 \u2502     1 \u2502     5 \u2502     4 \u2502\n\u2502 \u2026                   \u2502       \u2026 \u2502     \u2026 \u2502       \u2026 \u2502     \u2026 \u2502       \u2026 \u2502       \u2026 \u2502       \u2026 \u2502     \u2026 \u2502     \u2026 \u2502     \u2026 \u2502     \u2026 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>While TidierDB uses <code>@arrange</code> like TidierData.jl TidierDB</p> <pre><code>@chain t(mtcars) @arrange(mpg) @collect\n</code></pre> <pre><code>32\u00d712 DataFrame\n Row \u2502 model                mpg       cyl     disp      hp      drat      wt        qsec      vs      am      gear    carb\n     \u2502 String?              Float64?  Int64?  Float64?  Int64?  Float64?  Float64?  Float64?  Int64?  Int64?  Int64?  Int64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Cadillac Fleetwood       10.4       8     472.0     205      2.93     5.25      17.98       0       0       3       4\n   2 \u2502 Lincoln Continental      10.4       8     460.0     215      3.0      5.424     17.82       0       0       3       4\n   3 \u2502 Camaro Z28               13.3       8     350.0     245      3.73     3.84      15.41       0       0       3       4\n   4 \u2502 Duster 360               14.3       8     360.0     245      3.21     3.57      15.84       0       0       3       4\n   5 \u2502 Chrysler Imperial        14.7       8     440.0     230      3.23     5.345     17.42       0       0       3       4\n   6 \u2502 Maserati Bora            15.0       8     301.0     335      3.54     3.57      14.6        0       1       5       8\n  \u22ee  \u2502          \u22ee              \u22ee        \u22ee        \u22ee        \u22ee        \u22ee         \u22ee         \u22ee        \u22ee       \u22ee       \u22ee       \u22ee\n  27 \u2502 Porsche 914-2            26.0       4     120.3      91      4.43     2.14      16.7        0       1       5       2\n  28 \u2502 Fiat X1-9                27.3       4      79.0      66      4.08     1.935     18.9        1       1       4       1\n  29 \u2502 Honda Civic              30.4       4      75.7      52      4.93     1.615     18.52       1       1       4       2\n  30 \u2502 Lotus Europa             30.4       4      95.1     113      3.77     1.513     16.9        1       1       5       2\n  31 \u2502 Fiat 128                 32.4       4      78.7      66      4.08     2.2       19.47       1       1       4       1\n  32 \u2502 Toyota Corolla           33.9       4      71.1      65      4.22     1.835     19.9        1       1       4       1\n                                                                                                              20 rows omitted\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#selecting-columns","title":"Selecting columns","text":"<p>In Ibis, columns must be prefixed with the table name, or in this case <code>_</code>, or they can be given as a string. Finally to using helper functions like <code>startswith</code> requires importing selectors as above. Ibis</p> <pre><code>mtcars.select(s.startswith(\"m\"), \"drat\", _.wt)\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 model             \u2503 mpg     \u2503 drat    \u2503 wt      \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 string            \u2502 float64 \u2502 float64 \u2502 float64 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 Mazda RX4         \u2502    21.0 \u2502    3.90 \u2502   2.620 \u2502\n\u2502 Mazda RX4 Wag     \u2502    21.0 \u2502    3.90 \u2502   2.875 \u2502\n\u2502 Datsun 710        \u2502    22.8 \u2502    3.85 \u2502   2.320 \u2502\n\u2502 Hornet 4 Drive    \u2502    21.4 \u2502    3.08 \u2502   3.215 \u2502\n\u2502 Hornet Sportabout \u2502    18.7 \u2502    3.15 \u2502   3.440 \u2502\n\u2502 Valiant           \u2502    18.1 \u2502    2.76 \u2502   3.460 \u2502\n\u2502 Duster 360        \u2502    14.3 \u2502    3.21 \u2502   3.570 \u2502\n\u2502 Merc 240D         \u2502    24.4 \u2502    3.69 \u2502   3.190 \u2502\n\u2502 Merc 230          \u2502    22.8 \u2502    3.92 \u2502   3.150 \u2502\n\u2502 Merc 280          \u2502    19.2 \u2502    3.92 \u2502   3.440 \u2502\n\u2502 \u2026                 \u2502       \u2026 \u2502       \u2026 \u2502       \u2026 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>TidierDB does not require names to be prefixed and, like TidierData, tidy column selection with <code>starts_with</code>, <code>ends_with</code>, and <code>contains</code> is supported at base. TidierDB also supports providing column names as strings, although this would only be needed in the setting of renaming a column with a space in it. TidierDB</p> <pre><code>@chain t(mtcars) @select(starts_with(\"m\"), \"drat\", wt) @collect\n</code></pre> <pre><code>32\u00d74 DataFrame\n Row \u2502 model              mpg       drat      wt\n     \u2502 String?            Float64?  Float64?  Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Mazda RX4              21.0      3.9      2.62\n   2 \u2502 Mazda RX4 Wag          21.0      3.9      2.875\n   3 \u2502 Datsun 710             22.8      3.85     2.32\n   4 \u2502 Hornet 4 Drive         21.4      3.08     3.215\n   5 \u2502 Hornet Sportabout      18.7      3.15     3.44\n   6 \u2502 Valiant                18.1      2.76     3.46\n  \u22ee  \u2502         \u22ee             \u22ee         \u22ee         \u22ee\n  27 \u2502 Porsche 914-2          26.0      4.43     2.14\n  28 \u2502 Lotus Europa           30.4      3.77     1.513\n  29 \u2502 Ford Pantera L         15.8      4.22     3.17\n  30 \u2502 Ferrari Dino           19.7      3.62     2.77\n  31 \u2502 Maserati Bora          15.0      3.54     3.57\n  32 \u2502 Volvo 142E             21.4      4.11     2.78\n                                        20 rows omitted\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#multi-step-queries-and-summarizing","title":"Multi step queries and summarizing","text":"<p>Aggregating data is done with <code>aggregate</code> in Ibis and <code>@summarize</code> in TidierDB. To group data, Ibis uses <code>by =</code> within the <code>aggregate</code> call vs TidierDB adheres to <code>@group_by</code> convention Ibis</p> <pre><code>mtcars.aggregate(\n    total_hp=_.hp.sum(),\n    avg_hp=_.hp.mean(),\n    having=_.hp.sum() &lt; 1000,\n    by=['cyl']\n)\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 cyl   \u2503 total_hp \u2503 avg_hp     \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2547\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 int64 \u2502 int64    \u2502 float64    \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502     6 \u2502      856 \u2502 122.285714 \u2502\n\u2502     4 \u2502      909 \u2502  82.636364 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>In TidierDB, <code>@filter</code> will automatically determine whether the criteria belong in a <code>WHERE</code> or <code>HAVING</code> SQL clause. TidierDB</p> <pre><code>@chain t(mtcars) begin\n    @group_by(cyl)\n    @summarize(total_hp = sum(hp),\n               avg_hp = avg(hp))\n    @filter(total_hp &lt; 1000)\n    @collect\nend\n</code></pre> <pre><code>2\u00d73 DataFrame\n Row \u2502 cyl     total_hp  avg_hp\n     \u2502 Int64?  Int128?   Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      6       856  122.286\n   2 \u2502      4       909   82.6364\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/ibis_comp/#renaming-columns","title":"Renaming columns","text":"<p>Both tools use <code>rename</code>/@rename to rename columns Ibis</p> <pre><code>mtcars.rename(make_model = \"model\").select(_.make_model)\n</code></pre> <pre><code>\u250f\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2513\n\u2503 make_model        \u2503\n\u2521\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2529\n\u2502 string            \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 Mazda RX4         \u2502\n\u2502 Mazda RX4 Wag     \u2502\n\u2502 Datsun 710        \u2502\n\u2502 Hornet 4 Drive    \u2502\n\u2502 Hornet Sportabout \u2502\n\u2502 Valiant           \u2502\n\u2502 Duster 360        \u2502\n\u2502 Merc 240D         \u2502\n\u2502 Merc 230          \u2502\n\u2502 Merc 280          \u2502\n\u2502 \u2026                 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>TidierDB</p> <pre><code>@chain t(mtcars) @rename(model_make = model) @select(model_make) @collect\n</code></pre> <pre><code>32\u00d71 DataFrame\n Row \u2502 model_make\n     \u2502 String?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Mazda RX4\n   2 \u2502 Mazda RX4 Wag\n   3 \u2502 Datsun 710\n   4 \u2502 Hornet 4 Drive\n   5 \u2502 Hornet Sportabout\n   6 \u2502 Valiant\n  \u22ee  \u2502         \u22ee\n  27 \u2502 Porsche 914-2\n  28 \u2502 Lotus Europa\n  29 \u2502 Ford Pantera L\n  30 \u2502 Ferrari Dino\n  31 \u2502 Maserati Bora\n  32 \u2502 Volvo 142E\n          20 rows omitted\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/key_differences/","title":"Key Differences from TidierData.jl","text":"<p>There are a few important syntax and behavior differences between TidierDB.jl and TidierData.jl outlined below.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#creating-a-database","title":"Creating a database","text":"<p>For these examples we will use DuckDB, the default backend, although SQLite, Postgres, MySQL, MariaDB, MSSQL, and ClickHouse are possible. If you have an existing DuckDB connection, then this step is not required. For these examples, we will create a data frame and copy it to an in-memory DuckDB database.</p> <pre><code>using DataFrames, TidierDB\n\ndf = DataFrame(id = [string('A' + i \u00f7 26, 'A' + i % 26) for i in 0:9],\n                        groups = [i % 2 == 0 ? \"aa\" : \"bb\" for i in 1:10],\n                        value = repeat(1:5, 2),\n                        percent = 0.1:0.1:1.0);\n\ndb = connect(duckdb());\n\ncopy_to(db, df, \"df_mem\"); # copying over the data frame to an in-memory database\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#row-ordering","title":"Row ordering","text":"<p>DuckDB benefits from aggressive parallelization of pipelines. This means that if you have multiple threads enabled in Julia, which you can check or set using <code>Threads.nthreads()</code>, DuckDB will use multiple threads. However, because many operations are multi-threaded, the resulting row order is inconsistent. If row order needs to be deterministic for your use case, make sure to apply an <code>@arrange(column_name_1, column_name_2, etc...)</code> prior to collecting the results.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#starting-a-chain","title":"Starting a chain","text":"<p>When using TidierDB, <code>db_table(connection, :table_name)</code> is used to start a chain.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#grouped-mutation","title":"Grouped mutation","text":"<p>In TidierDB, when performing <code>@group_by</code> then <code>@mutate</code>, the table will be ungrouped after applying all of the mutations in the clause to the grouped data. To perform subsequent grouped operations, the user would have to regroup the data. This is demonstrated below.</p> <pre><code>@chain db_table(db, :df_mem) begin\n    @group_by(groups)\n    @summarize(mean_percent = mean(percent))\n    @collect\n end\n</code></pre> 2\u00d72 DataFrame Rowgroupsmean_percentString?Float64?1bb0.52aa0.6 <p>Regrouping following <code>@mutate</code></p> <pre><code>@chain db_table(db, :df_mem) begin\n    @group_by(groups)\n    @mutate(max = maximum(percent), min = minimum(percent))\n    @group_by(groups)\n    @summarise(mean_percent = mean(percent))\n    @collect\nend\n</code></pre> 2\u00d72 DataFrame Rowgroupsmean_percentString?Float64?1bb0.52aa0.6 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#joining","title":"Joining","text":"<p>There is one key difference for joining:</p> <p>The column on both the new and old table must be specified. They do not need to be the same, and given SQL behavior where both columns are kept when joining two tables, it is preferable if they have different names. This avoids \"ambiguous reference\" errors that would otherwise come up and complicate the use of tidy selection for columns. Athena has an additional slight difference given the need for parameters, which is covered in the Athena documentation page.</p> <pre><code>df2 = DataFrame(id2 = [\"AA\", \"AC\", \"AE\", \"AG\", \"AI\", \"AK\", \"AM\"],\n                category = [\"X\", \"Y\", \"X\", \"Y\", \"X\", \"Y\", \"X\"],\n                score = [88, 92, 77, 83, 95, 68, 74]);\n\ncopy_to(db, df2, \"df_join\");\n\n@chain db_table(db, :df_mem) begin\n    @left_join(df_join, id2, id)\n    @collect\nend\n</code></pre> 10\u00d77 DataFrame Rowidgroupsvaluepercentid2categoryscoreString?String?Int64?Float64?String?String?Int64?1AAbb10.1AAX882ACbb30.3ACY923AEbb50.5AEX774AGbb20.7AGY835AIbb40.9AIX956ABaa20.2missingmissingmissing7ADaa40.4missingmissingmissing8AFaa10.6missingmissingmissing9AHaa30.8missingmissingmissing10AJaa51.0missingmissingmissing <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#differences-in-case_when","title":"Differences in <code>case_when()</code>","text":"<p>In TidierDB, after the clause is completed, the result for the new column should is separated by a comma <code>,</code> in contrast to TidierData.jl, where the result for the new column is separated by a <code>=&gt;</code> .</p> <pre><code>@chain db_table(db, :df_mem) begin\n    @mutate(new_col = case_when(percent &gt; .5, \"Pass\",  # in TidierData, percent &gt; .5 =&gt; \"Pass\",\n                                percent &lt;= .5, \"Try Again\", # percent &lt;= .5 =&gt; \"Try Again\"\n                                true, \"middle\"))\n    @collect\n end\n</code></pre> 10\u00d75 DataFrame Rowidgroupsvaluepercentnew_colString?String?Int64?Float64?String?1AAbb10.1Try Again2ABaa20.2Try Again3ACbb30.3Try Again4ADaa40.4Try Again5AEbb50.5Try Again6AFaa10.6Pass7AGbb20.7Pass8AHaa30.8Pass9AIbb40.9Pass10AJaa51.0Pass <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#interpolation","title":"Interpolation","text":"<p>To use !! Interpolation, instead of being able to define the alternate names/value in the global context, the user has to use <code>@interpolate</code>. This will hopefully be fixed in future versions. Otherwise, the behavior is generally the same, although this creates friction around calling functions.</p> <p>Also, when using interpolation with exponenents, the interpolated value must go inside of parenthesis.</p> <pre><code>@interpolate((test, :percent)); # this still supports strings, vectors of names, and values\n\n@chain db_table(db, :df_mem) begin\n    @mutate(new_col = case_when((!!test)^2 &gt; .5, \"Pass\",\n                                (!!test)^2 &lt; .5, \"Try Again\",\n                                \"middle\"))\n    @collect\nend\n</code></pre> <pre><code>10\u00d75 DataFrame\n Row \u2502 id       groups   value   percent   new_col\n     \u2502 String?  String?  Int64?  Float64?  String?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 AA       bb            1       0.1  Try Again\n   2 \u2502 AB       aa            2       0.2  Try Again\n   3 \u2502 AC       bb            3       0.3  Try Again\n  \u22ee  \u2502    \u22ee        \u22ee       \u22ee        \u22ee          \u22ee\n   8 \u2502 AH       aa            3       0.8  Pass\n   9 \u2502 AI       bb            4       0.9  Pass\n  10 \u2502 AJ       aa            5       1.0  Pass\n                                       4 rows omitted\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/key_differences/#slicing-ties","title":"Slicing ties","text":"<p><code>slice_min()</code> and <code>@slice_max()</code> will always return ties due to SQL behavior.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/outofmemex/","title":"Working With Larger than RAM Datasets","text":"<p>While using the DuckDB backend, TidierDB's lazy intferace enables querying datasets larger than your available RAM.</p> <p>To illustrate this, we will recreate the Hugging Face x Polars example. The final table results are shown below and in this Hugging Face x DuckDB example</p> <p>First we will load TidierDB, set up a local database and then set the URLs for the 2 training datasets from huggingface.co</p> <pre><code>using TidierDB\ndb = connect(duckdb())\n\nurls = [\"https://huggingface.co/datasets/blog_authorship_corpus/resolve/refs%2Fconvert%2Fparquet/blog_authorship_corpus/train/0000.parquet\",\n \"https://huggingface.co/datasets/blog_authorship_corpus/resolve/refs%2Fconvert%2Fparquet/blog_authorship_corpus/train/0001.parquet\"];\n</code></pre> <p>Here, we pass the vector of URLs to <code>db_table</code>, which will not copy them into memory. Since these datasets are so large, we will also set <code>stream = true</code> in <code>@collect</code> to stream the results. If we wanted to read all the files in the folder we could have replace the <code>0000</code> with <code>*</code> (wildcard) <code>db_table(db, \"Path/to/folder/*.parquet\")</code> Of note, reading these files from URLs is not as rapid as reading them from local files.</p> <pre><code>@chain db_table(db, urls) begin\n    @group_by(horoscope)\n    @summarise(count = n(), avg_blog_length = mean(length(text)))\n    @arrange(desc(count))\n    @aside @show_query _\n    @collect(stream = true)\nend\n</code></pre> <p>Placing <code>@aside @show_query _</code> before <code>@collect</code> above lets us see the SQL query and collect it to a local DataFrame at the same time.</p> <pre><code>SELECT horoscope, COUNT(*) AS count, AVG(length(text)) AS avg_blog_length\n        FROM read_parquet(['https://huggingface.co/datasets/blog_authorship_corpus/resolve/refs%2Fconvert%2Fparquet/blog_authorship_corpus/train/0000.parquet', 'https://huggingface.co/datasets/blog_authorship_corpus/resolve/refs%2Fconvert%2Fparquet/blog_authorship_corpus/train/0001.parquet'])\n        GROUP BY horoscope\n        ORDER BY avg_blog_length DESC\n12\u00d73 DataFrame\n Row \u2502 horoscope    count   avg_blog_length\n     \u2502 String?      Int64?  Float64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Aquarius      49568         1125.83\n   2 \u2502 Cancer        63512         1097.96\n   3 \u2502 Libra         60304         1060.61\n   4 \u2502 Capricorn     49402         1059.56\n   5 \u2502 Sagittarius   50431         1057.46\n   6 \u2502 Leo           58010         1049.6\n   7 \u2502 Taurus        61571         1022.69\n   8 \u2502 Gemini        52925         1020.26\n   9 \u2502 Scorpio       56495         1014.03\n  10 \u2502 Pisces        53812         1011.75\n  11 \u2502 Virgo         64629          996.684\n  12 \u2502 Aries         69134          918.081\n</code></pre> <p>To learn more about memory efficient queries on larger than RAM files, this blog from DuckDB will help maximize your local <code>db</code></p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/s3viaduckdb/","title":"S3 + DuckDB + TidierDB","text":"<p>TidierDB allows you leverage DuckDB's seamless database integration.</p> <p>Using DuckDB, you can connect to an AWS or GoogleCloud Database to query directly without making any local copies.</p> <p>You can also use <code>DBInterface.execute</code> to set up any DuckDB database connection you need and then use that db to query with TidierDB</p> <pre><code>using TidierDB\n\n#Connect to Google Cloud via DuckDB\n#google_db = connect(duckdb(), :gbq, access_key=\"string\", secret_key=\"string\")\n\n#Connect to AWS via DuckDB\naws_db = connect(duckdb(), :aws, aws_access_key_id= \"string\",\n                                aws_secret_access_key= \"string\",\n                                aws_region=\"us-east-1\")\ns3_csv_path = \"s3://path/to_data.csv\"\n\n@chain db_table(aws_db, s3_csv_path) begin\n    @filter(!starts_with(column1, \"M\"))\n    @group_by(cyl)\n    @summarize(mpg = mean(mpg))\n    @mutate(mpg_squared = mpg^2,\n               mpg_rounded = round(mpg),\n               mpg_efficiency = case_when(\n                                 mpg &gt;= cyl^2 , \"efficient\",\n                                 mpg &lt; 15.2 , \"inefficient\",\n                                 \"moderate\"))\n    @filter(mpg_efficiency in (\"moderate\", \"efficient\"))\n    @arrange(desc(mpg_rounded))\n    @collect\nend\n</code></pre> <pre><code>2\u00d75 DataFrame\n Row \u2502 cyl     mpg       mpg_squared  mpg_rounded  mpg_efficiency\n     \u2502 Int64?  Float64?  Float64?     Float64?     String?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      4   27.3444      747.719         27.0  efficient\n   2 \u2502      6   19.7333      389.404         20.0  moderate\n</code></pre> <p>This page was generated using Literate.jl.</p>"}]}
\ No newline at end of file