From 388ae059cc510de9da44cfbe03d79e7458a41167 Mon Sep 17 00:00:00 2001 From: Philipp Holl Date: Tue, 16 Jan 2024 12:17:33 +0100 Subject: [PATCH] Add named dims references to paper --- paper.bib | 29 +++++++++++++++++++++++++++++ paper.md | 10 +++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/paper.bib b/paper.bib index 72443ea0..5fe8a0b1 100644 --- a/paper.bib +++ b/paper.bib @@ -287,4 +287,33 @@ @software{Jax2018 url = {http://github.com/google/jax}, version = {0.2.5}, year = {2018}, +} + + +@article{xarray2017, + title = {xarray: {N-D} labeled arrays and datasets in {Python}}, + author = {Hoyer, S. and J. Hamman}, + journal = {Journal of Open Research Software}, + volume = {5}, + number = {1}, + year = {2017}, + publisher = {Ubiquity Press}, + doi = {10.5334/jors.148}, + url = {https://doi.org/10.5334/jors.148} +} + +@misc{NamedTensor, + author = {Harvard NLP}, + title = {NamedTensor}, + howpublished = {\url{https://github.com/harvardnlp/NamedTensor}}, + year = {2019}, + note = {Accessed on 16 Jan 2024} +} + +@misc{Einops2018, + author = {Aleksey Rogozhnikov}, + title = {Einops}, + howpublished = {\url{https://github.com/arogozhnikov/einops}}, + year = {2018}, + note = {Accessed on 16 Jan 2024} } \ No newline at end of file diff --git a/paper.md b/paper.md index 264f3dfb..f41f33e3 100644 --- a/paper.md +++ b/paper.md @@ -125,11 +125,15 @@ This way, an easy-to-use PyTorch network can interact with a Jax simulation for ### Named dimensions -In $\Phi_\textrm{ML}$, dimensions are not referenced by their index but by name instead, similar to pandas [@Pandas2010]. +In $\Phi_\textrm{ML}$, dimensions are not referenced by their index but by name instead. We make dimension names mandatory for all dimensions, forcing users to explicitly document the meaning of each dimension upon creation. The name information gets preserved by tensor manipulations and can be inspected at any later point, e.g. by printing it or using a debugger. -While similar concepts exist for all backend libraries, these features are limited and, consequently, have not been widely adopted. -$\Phi_\textrm{ML}$ introduces the slicing syntax `tensor.dim_name[start:stop:step]`, replacing the less readable slices `tensor[..., start:stop:step, :]`, and supports dimension names in all functions as first-class citizens. +Named dimensions are also present in other numerics libraries, such as pandas [@Pandas2010], xarray [@xarray2017], einops [@Einops2018], +and are available for PyTorch as an add-on [@NamedTensor]. +However, these libraries make dimension names optional and, consequently, cannot support them to the same extent that $\Phi_\textrm{ML}$ can, +preventing mainstream adoption. +In $\Phi_\textrm{ML}$, dimension names are one part of a carefully-designed set of tools, making them more intuitive and useful than in previous libraries. +For instance, $\Phi_\textrm{ML}$ introduces the convenience slicing syntax `tensor.dim_name[start:stop:step]`, replacing the less readable slices `tensor[..., start:stop:step, :]`, and supports dimension names in all functions as first-class citizens. While naming dimensions adds a small amount of additional code, this is easily outweighed by the gains in readability and ease of debugging. Furthermore, dimension names enable automatic reshaping, which eliminates the need for reshaping operations in user code, often significantly reducing the amount of required boilerplate code.