From 05a36546440624d725a8157f331cbe7c15072e6e Mon Sep 17 00:00:00 2001 From: lucidrains Date: Wed, 27 Sep 2023 11:04:07 -0700 Subject: [PATCH] calculate gamma positions in float64 https://github.com/lucidrains/enformer-pytorch/issues/31 --- enformer_pytorch/modeling_enformer.py | 14 +++++++++++--- setup.py | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/enformer_pytorch/modeling_enformer.py b/enformer_pytorch/modeling_enformer.py index 8420873..5b060f7 100644 --- a/enformer_pytorch/modeling_enformer.py +++ b/enformer_pytorch/modeling_enformer.py @@ -75,14 +75,22 @@ def get_positional_features_gamma(positions, features, seq_len, stddev = None, s if not exists(start_mean): start_mean = seq_len / features - mean = torch.linspace(start_mean, seq_len, features, device = positions.device) + # turns out xlogy between tensorflow and torch differs because of the log - thanks to phd student @johahi for finding this! + # do everything in float64 here for precision + + dtype = positions.dtype + positions = positions.double() + mean = torch.linspace(start_mean, seq_len, features, device = positions.device, dtype = torch.float64) + mean = mean[None, ...] concentration = (mean / stddev) ** 2 rate = mean / stddev ** 2 - probabilities = gamma_pdf(positions.float().abs()[..., None], concentration, rate) + + probabilities = gamma_pdf(positions.abs()[..., None], concentration, rate) probabilities = probabilities + eps outputs = probabilities / torch.amax(probabilities, dim = -1, keepdim = True) - return outputs + + return outputs.to(dtype) def get_positional_embed(seq_len, feature_size, device): distances = torch.arange(-seq_len + 1, seq_len, device = device) diff --git a/setup.py b/setup.py index 2cec5ad..42277d3 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ name = 'enformer-pytorch', packages = find_packages(exclude=[]), include_package_data = True, - version = '0.7.6', + version = '0.7.7', license='MIT', description = 'Enformer - Pytorch', author = 'Phil Wang',