Skip to content

Commit

Permalink
Merge pull request #19 from adtzlr/change-full-output-hessian
Browse files Browse the repository at this point in the history
Add and change default `tr.hessian(fun, full_output=False)`
  • Loading branch information
adtzlr authored Dec 10, 2022
2 parents 0fb28f1 + 8819b4d commit 6d1e96c
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 35 deletions.
20 changes: 12 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def fun(F, mu=1):
return mu / 2 * (J ** (-2 / 3) * I1 - 3)
```

The hessian of the scalar-valued function w.r.t. the chosen function argument (here, `wrt=0` or `wrt="F"`) is evaluated by variational calculus (Forward Mode AD implemented as Hyper-Dual Tensors). The function is called once for each component of the hessian (symmetry is taken care of). The function and the gradient are evaluated with no additional computational cost.
The hessian of the scalar-valued function w.r.t. the chosen function argument (here, `wrt=0` or `wrt="F"`) is evaluated by variational calculus (Forward Mode AD implemented as Hyper-Dual Tensors). The function is called once for each component of the hessian (symmetry is taken care of). The function and the gradient are evaluated with no additional computational cost. Optionally, the function calls are executed in parallel (threaded).

```python
import numpy as np
Expand All @@ -56,8 +56,9 @@ for a in range(3):
F[a, a] += 1

# W = tr.function(fun, wrt=0, ntrax=2)(F)
# dWdF, W = tr.gradient(fun, wrt=0, ntrax=2)(F)
d2WdF2, dWdF, W = tr.hessian(fun, wrt="F", ntrax=2)(F=F)
# dWdF = tr.gradient(fun, wrt=0, ntrax=2)(F)
# d2WdF2, dWdF, W = tr.hessian(fun, wrt="F", ntrax=2, full_output=True)(F=F)
d2WdF2 = tr.hessian(fun, wrt="F", ntrax=2, parallel=False)(F=F)
```

# Theory
Expand Down Expand Up @@ -152,18 +153,21 @@ P_12 = δ(I1_C) # (= Δ(I1_C))
A_1223 = Δδ(I1_C)
```

To obtain full gradients and hessians in one function call, `tensortrax` provides helpers (decorators) which handle the multiple function calls. Optionally, the function calls are executed in parallel (threaded).
To obtain full gradients and hessians in one function call, `tensortrax` provides helpers (decorators) which handle the multiple function calls.

```python
grad, func = tr.gradient(lambda F: trace(F.T() @ F), wrt=0, ntrax=0, parallel=False)(x)
hess, grad, func = tr.hessian(lambda F: trace(F.T() @ F))(x)
fun = lambda F: trace(F.T() @ F)

func = tr.function(fun)(x)
grad = tr.gradient(fun)(x)
hess = tr.hessian(fun)(x)
```

Evaluate the gradient- as well as the hessian-vector-product:

```python
gvp = tr.gradient_vector_product(lambda F: trace(F.T() @ F))(x, δx=x)
hvp = tr.hessian_vector_product(lambda F: trace(F.T() @ F))(x, δx=x, Δx=x)
gvp = tr.gradient_vector_product(fun)(x, δx=x)
hvp = tr.hessian_vector_product(fun)(x, δx=x, Δx=x)
```

# Extensions
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "tensortrax"
version = "0.1.9"
version = "0.2.0"
description = "Math on (Hyper-Dual) Tensors with Trailing Axes"
readme = "README.md"
requires-python = ">=3.7"
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = tensortrax
version = 0.1.9
version = 0.2.0
author = Andreas Dutzler
author_email = a.dutzler@gmail.com
description = Math on (Hyper-Dual) Tensors with Trailing Axes
Expand Down
22 changes: 14 additions & 8 deletions tensortrax/_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def evaluate_function(*args, **kwargs):
return evaluate_function


def gradient(fun, wrt=0, ntrax=0, parallel=False):
def gradient(fun, wrt=0, ntrax=0, parallel=False, full_output=False):
"Evaluate the gradient of a scalar-valued function."

def evaluate_gradient(*args, **kwargs):
Expand Down Expand Up @@ -88,12 +88,15 @@ def kernel(a, wrt, δx, Δx, ntrax, args, kwargs):
for th in threads:
th.join()

return np.array(dfdx).reshape(*t.shape, *t.trax), fx[0]
if full_output:
return np.array(dfdx).reshape(*t.shape, *t.trax), fx[0]
else:
return np.array(dfdx).reshape(*t.shape, *t.trax)

return evaluate_gradient


def hessian(fun, wrt=0, ntrax=0, parallel=False):
def hessian(fun, wrt=0, ntrax=0, parallel=False, full_output=False):
"Evaluate the hessian of a scalar-valued function."

def evaluate_hessian(*args, **kwargs):
Expand Down Expand Up @@ -130,11 +133,14 @@ def kernel(a, b, wrt, δx, Δx, ntrax, args, kwargs):
for th in threads:
th.join()

return (
np.array(d2fdx2).reshape(*t.shape, *t.shape, *t.trax),
np.array(dfdx).reshape(*t.shape, *t.trax),
fx[0],
)
if full_output:
return (
np.array(d2fdx2).reshape(*t.shape, *t.shape, *t.trax),
np.array(dfdx).reshape(*t.shape, *t.trax),
fx[0],
)
else:
return np.array(d2fdx2).reshape(*t.shape, *t.shape, *t.trax)

return evaluate_hessian

Expand Down
6 changes: 3 additions & 3 deletions tensortrax/_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,17 +171,17 @@ def __setitem__(self, key, value):
if self.δx[key].shape != δ(value).shape:
self.δx = np.tile(
self.δx.reshape(*self.shape, *np.ones(len(self.trax), dtype=int)),
(*np.ones(len(self.shape), dtype=int), *self.trax)
(*np.ones(len(self.shape), dtype=int), *self.trax),
)
if self.Δx[key].shape != Δ(value).shape:
self.Δx = np.tile(
self.Δx.reshape(*self.shape, *np.ones(len(self.trax), dtype=int)),
(*np.ones(len(self.shape), dtype=int), *self.trax)
(*np.ones(len(self.shape), dtype=int), *self.trax),
)
if self.Δδx[key].shape != Δδ(value).shape:
self.Δδx = np.tile(
self.Δδx.reshape(*self.shape, *np.ones(len(self.trax), dtype=int)),
(*np.ones(len(self.shape), dtype=int), *self.trax)
(*np.ones(len(self.shape), dtype=int), *self.trax),
)
self.δx[key] = δ(value)
self.Δx[key] = Δ(value)
Expand Down
26 changes: 14 additions & 12 deletions tests/test_hessian.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@ def test_function_gradient_hessian():
for parallel in [False, True]:
for fun in [neo_hooke, ogden]:
ww = tr.function(fun, ntrax=2, parallel=parallel)(F)
dwdf, w = tr.gradient(fun, ntrax=2, parallel=parallel)(F)
d2WdF2, dWdF, W = tr.hessian(fun, ntrax=2, parallel=parallel)(F)
dwdf, w = tr.gradient(fun, ntrax=2, parallel=parallel, full_output=True)(F)
d2WdF2, dWdF, W = tr.hessian(
fun, ntrax=2, parallel=parallel, full_output=True
)(F)

assert W.shape == (1, 1)
assert dWdF.shape == (3, 3, 1, 1)
Expand All @@ -48,17 +50,17 @@ def test_trig():

for fun in [trig]:
ww = tr.function(fun)(F)
dwdf, w = tr.gradient(fun)(F)
d2WdF2, dWdF, W = tr.hessian(fun)(F)
dwdf = tr.gradient(fun, full_output=False)(F)
d2WdF2 = tr.hessian(fun, full_output=False)(F)


def test_repeated_eigvals():

F = np.eye(3)

ntrax = len(F.shape) - 2
d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax)(F)
d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax)(F)
d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax, full_output=True)(F)
d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax, full_output=True)(F)

assert np.allclose(w, W)
assert np.allclose(dwdf, dWdF)
Expand All @@ -68,8 +70,8 @@ def test_repeated_eigvals():
F[2, 2] = 2

ntrax = len(F.shape) - 2
d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax)(F)
d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax)(F)
d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax, full_output=True)(F)
d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax, full_output=True)(F)

assert np.allclose(w, W)
assert np.allclose(dwdf, dWdF)
Expand All @@ -78,8 +80,8 @@ def test_repeated_eigvals():
F = (np.eye(3).ravel()).reshape(3, 3, 1, 1)

ntrax = len(F.shape) - 2
d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax)(F)
d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax)(F)
d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax, full_output=True)(F)
d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax, full_output=True)(F)

assert np.allclose(w, W)
assert np.allclose(dwdf, dWdF)
Expand All @@ -89,8 +91,8 @@ def test_repeated_eigvals():
F[2, 2] = 2

ntrax = len(F.shape) - 2
d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax)(F)
d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax)(F)
d2WdF2, dWdF, W = tr.hessian(ogden, ntrax=ntrax, full_output=True)(F)
d2wdf2, dwdf, w = tr.hessian(neo_hooke, ntrax=ntrax, full_output=True)(F)

assert np.allclose(w, W)
assert np.allclose(dwdf, dWdF)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ def test_scalar():
with pytest.raises(TypeError):
tr.hessian(fun, wrt=[1, 2])(x, y)

h, g, f = tr.hessian(fun, wrt=0, ntrax=1)(x, y)
h, g, f = tr.hessian(fun, wrt=0, ntrax=1, full_output=True)(x, y)

assert np.allclose(g, 2 * x / y + np.log(y))
assert np.allclose(h, 2 / y)

h, g, f = tr.hessian(fun, wrt="y", ntrax=1)(x=x, y=y)
h, g, f = tr.hessian(fun, wrt="y", ntrax=1, full_output=True)(x=x, y=y)

assert np.allclose(g, -(x**2) / y**2 + x / y)
assert np.allclose(h, 2 * x**2 / y**3 - x / y**2)
Expand Down

0 comments on commit 6d1e96c

Please sign in to comment.