Merge pull request #19 from adtzlr/change-full-output-hessian

Add and change default `tr.hessian(fun, full_output=False)`
adtzlr · Dec 10, 2022 · 6d1e96c · 6d1e96c
2 parents 0fb28f1 + 8819b4d
commit 6d1e96c
Show file tree

Hide file tree

Showing 7 changed files with 47 additions and 35 deletions.
diff --git a/README.md b/README.md
@@ -44,7 +44,7 @@ def fun(F, mu=1):
     return mu / 2 * (J ** (-2 / 3) * I1 - 3)
 ```
 
-The hessian of the scalar-valued function w.r.t. the chosen function argument (here, `wrt=0` or `wrt="F"`) is evaluated by variational calculus (Forward Mode AD implemented as Hyper-Dual Tensors). The function is called once for each component of the hessian (symmetry is taken care of). The function and the gradient are evaluated with no additional computational cost. 
+The hessian of the scalar-valued function w.r.t. the chosen function argument (here, `wrt=0` or `wrt="F"`) is evaluated by variational calculus (Forward Mode AD implemented as Hyper-Dual Tensors). The function is called once for each component of the hessian (symmetry is taken care of). The function and the gradient are evaluated with no additional computational cost. Optionally, the function calls are executed in parallel (threaded).
 
 ```python
 import numpy as np
@@ -56,8 +56,9 @@ for a in range(3):
     F[a, a] += 1
 
 # W = tr.function(fun, wrt=0, ntrax=2)(F)
-# dWdF, W = tr.gradient(fun, wrt=0, ntrax=2)(F)
-d2WdF2, dWdF, W = tr.hessian(fun, wrt="F", ntrax=2)(F=F)
+# dWdF = tr.gradient(fun, wrt=0, ntrax=2)(F)
+# d2WdF2, dWdF, W = tr.hessian(fun, wrt="F", ntrax=2, full_output=True)(F=F)
+d2WdF2 = tr.hessian(fun, wrt="F", ntrax=2, parallel=False)(F=F)
 ```
 
 # Theory
@@ -152,18 +153,21 @@ P_12   =  δ(I1_C) # (= Δ(I1_C))
 A_1223 = Δδ(I1_C)
 ```
 
-To obtain full gradients and hessians in one function call, `tensortrax` provides helpers (decorators) which handle the multiple function calls. Optionally, the function calls are executed in parallel (threaded).
+To obtain full gradients and hessians in one function call, `tensortrax` provides helpers (decorators) which handle the multiple function calls.
 
 ```python
-grad, func = tr.gradient(lambda F: trace(F.T() @ F), wrt=0, ntrax=0, parallel=False)(x)
-hess, grad, func = tr.hessian(lambda F: trace(F.T() @ F))(x)
+fun = lambda F: trace(F.T() @ F)
+
+func = tr.function(fun)(x)
+grad = tr.gradient(fun)(x)
+hess = tr.hessian(fun)(x)
 ```
 
 Evaluate the gradient- as well as the hessian-vector-product:
 
 ```python
-gvp = tr.gradient_vector_product(lambda F: trace(F.T() @ F))(x, δx=x)
-hvp = tr.hessian_vector_product(lambda F: trace(F.T() @ F))(x, δx=x, Δx=x)
+gvp = tr.gradient_vector_product(fun)(x, δx=x)
+hvp = tr.hessian_vector_product(fun)(x, δx=x, Δx=x)
 ```
 
 # Extensions

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "tensortrax"
-version = "0.1.9"
+version = "0.2.0"
 description = "Math on (Hyper-Dual) Tensors with Trailing Axes"
 readme = "README.md"
 requires-python = ">=3.7"

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = tensortrax
-version = 0.1.9
+version = 0.2.0
 author = Andreas Dutzler
 author_email = a.dutzler@gmail.com
 description = Math on (Hyper-Dual) Tensors with Trailing Axes

diff --git a/tensortrax/_evaluate.py b/tensortrax/_evaluate.py
@@ -53,7 +53,7 @@ def evaluate_function(*args, **kwargs):
     return evaluate_function
 
 
-def gradient(fun, wrt=0, ntrax=0, parallel=False):
+def gradient(fun, wrt=0, ntrax=0, parallel=False, full_output=False):
     "Evaluate the gradient of a scalar-valued function."
 
     def evaluate_gradient(*args, **kwargs):
@@ -88,12 +88,15 @@ def kernel(a, wrt, δx, Δx, ntrax, args, kwargs):
             for th in threads:
                 th.join()
 
-        return np.array(dfdx).reshape(*t.shape, *t.trax), fx[0]
+        if full_output:
+            return np.array(dfdx).reshape(*t.shape, *t.trax), fx[0]
+        else:
+            return np.array(dfdx).reshape(*t.shape, *t.trax)
 
     return evaluate_gradient
 
 
-def hessian(fun, wrt=0, ntrax=0, parallel=False):
+def hessian(fun, wrt=0, ntrax=0, parallel=False, full_output=False):
     "Evaluate the hessian of a scalar-valued function."
 
     def evaluate_hessian(*args, **kwargs):
@@ -130,11 +133,14 @@ def kernel(a, b, wrt, δx, Δx, ntrax, args, kwargs):
             for th in threads:
                 th.join()
 
-        return (
-            np.array(d2fdx2).reshape(*t.shape, *t.shape, *t.trax),
-            np.array(dfdx).reshape(*t.shape, *t.trax),
-            fx[0],
-        )
+        if full_output:
+            return (
+                np.array(d2fdx2).reshape(*t.shape, *t.shape, *t.trax),
+                np.array(dfdx).reshape(*t.shape, *t.trax),
+                fx[0],
+            )
+        else:
+            return np.array(d2fdx2).reshape(*t.shape, *t.shape, *t.trax)
 
     return evaluate_hessian
 

diff --git a/tensortrax/_tensor.py b/tensortrax/_tensor.py
@@ -171,17 +171,17 @@ def __setitem__(self, key, value):
             if self.δx[key].shape != δ(value).shape:
                 self.δx = np.tile(
                     self.δx.reshape(*self.shape, *np.ones(len(self.trax), dtype=int)),
-                    (*np.ones(len(self.shape), dtype=int), *self.trax)
+                    (*np.ones(len(self.shape), dtype=int), *self.trax),
                 )
             if self.Δx[key].shape != Δ(value).shape:
                 self.Δx = np.tile(
                     self.Δx.reshape(*self.shape, *np.ones(len(self.trax), dtype=int)),
-                    (*np.ones(len(self.shape), dtype=int), *self.trax)
+                    (*np.ones(len(self.shape), dtype=int), *self.trax),
                 )
             if self.Δδx[key].shape != Δδ(value).shape:
                 self.Δδx = np.tile(
                     self.Δδx.reshape(*self.shape, *np.ones(len(self.trax), dtype=int)),
-                    (*np.ones(len(self.shape), dtype=int), *self.trax)
+                    (*np.ones(len(self.shape), dtype=int), *self.trax),
                 )
             self.δx[key] = δ(value)
             self.Δx[key] = Δ(value)

diff --git a/tests/test_hessian.py b/tests/test_hessian.py
@@ -30,8 +30,10 @@ def test_function_gradient_hessian():
     for parallel in [False, True]:
         for fun in [neo_hooke, ogden]:
             ww = tr.function(fun, ntrax=2, parallel=parallel)(F)
-            dwdf, w = tr.gradient(fun, ntrax=2, parallel=parallel)(F)
-            d2WdF2, dWdF, W = tr.hessian(fun, ntrax=2, parallel=parallel)(F)
+            dwdf, w = tr.gradient(fun, ntrax=2, parallel=parallel, full_output=True)(F)
+            d2WdF2, dWdF, W = tr.hessian(
+                fun, ntrax=2, parallel=parallel, full_output=True
+            )(F)
 
             assert W.shape == (1, 1)
             assert dWdF.shape == (3, 3, 1, 1)
@@ -48,17 +50,17 @@ def test_trig():
 
     for fun in [trig]:
         ww = tr.function(fun)(F)
-        dwdf, w = tr.gradient(fun)(F)
-        d2WdF2, dWdF, W = tr.hessian(fun)(F)
+        dwdf = tr.gradient(fun, full_output=False)(F)
+        d2WdF2 = tr.hessian(fun, full_output=False)(F)
 
 
 def test_repeated_eigvals():
 
     F = np.eye(3)
 
     ntrax = len(F.shape) - 2
-    d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax)(F)
-    d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax)(F)
+    d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax, full_output=True)(F)
+    d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax, full_output=True)(F)
 
     assert np.allclose(w, W)
     assert np.allclose(dwdf, dWdF)
@@ -68,8 +70,8 @@ def test_repeated_eigvals():
     F[2, 2] = 2
 
     ntrax = len(F.shape) - 2
-    d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax)(F)
-    d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax)(F)
+    d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax, full_output=True)(F)
+    d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax, full_output=True)(F)
 
     assert np.allclose(w, W)
     assert np.allclose(dwdf, dWdF)
@@ -78,8 +80,8 @@ def test_repeated_eigvals():
     F = (np.eye(3).ravel()).reshape(3, 3, 1, 1)
 
     ntrax = len(F.shape) - 2
-    d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax)(F)
-    d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax)(F)
+    d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax, full_output=True)(F)
+    d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax, full_output=True)(F)
 
     assert np.allclose(w, W)
     assert np.allclose(dwdf, dWdF)
@@ -89,8 +91,8 @@ def test_repeated_eigvals():
     F[2, 2] = 2
 
     ntrax = len(F.shape) - 2
-    d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax)(F)
-    d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax)(F)
+    d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax, full_output=True)(F)
+    d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax, full_output=True)(F)
 
     assert np.allclose(w, W)
     assert np.allclose(dwdf, dWdF)

diff --git a/tests/test_scalar.py b/tests/test_scalar.py
@@ -19,12 +19,12 @@ def test_scalar():
     with pytest.raises(TypeError):
         tr.hessian(fun, wrt=[1, 2])(x, y)
 
-    h, g, f = tr.hessian(fun, wrt=0, ntrax=1)(x, y)
+    h, g, f = tr.hessian(fun, wrt=0, ntrax=1, full_output=True)(x, y)
 
     assert np.allclose(g, 2 * x / y + np.log(y))
     assert np.allclose(h, 2 / y)
 
-    h, g, f = tr.hessian(fun, wrt="y", ntrax=1)(x=x, y=y)
+    h, g, f = tr.hessian(fun, wrt="y", ntrax=1, full_output=True)(x=x, y=y)
 
     assert np.allclose(g, -(x**2) / y**2 + x / y)
     assert np.allclose(h, 2 * x**2 / y**3 - x / y**2)