Skip to content

Commit

Permalink
Merge pull request #204 from alexhernandezgarcia/cube-sep23-allornone…
Browse files Browse the repository at this point in the history
…-oldtest

Cube working decently fi.na.lly
  • Loading branch information
alexhernandezgarcia authored Oct 20, 2023
2 parents f2f229b + e041556 commit a106516
Show file tree
Hide file tree
Showing 22 changed files with 3,207 additions and 589 deletions.
4 changes: 2 additions & 2 deletions config/env/alaninedipeptide.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ length_traj: 10
vonmises_min_concentration: 1e-3
# Parameters of the fixed policy output distribution
n_comp: 3
fixed_distribution:
fixed_distr_params:
vonmises_mean: 0.0
vonmises_concentration: 0.5
# Parameters of the random policy output distribution
random_distribution:
random_distr_params:
vonmises_mean: 0.0
vonmises_concentration: 0.001
# Buffer
Expand Down
39 changes: 39 additions & 0 deletions config/env/ccube.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
defaults:
- base

_target_: gflownet.envs.cube.ContinuousCube

id: ccube
continuous: True
func: corners
# Dimensions of hypercube
n_dim: 2
# Constant to restrict interval of test sets
kappa: 1e-3
# Policy
min_incr: 0.1
n_comp: 1
epsilon: 1e-6
beta_params_min: 0.1
beta_params_max: 100.0
fixed_distr_params:
beta_weights: 1.0
beta_alpha: 10.0
beta_beta: 10.0
bernoulli_bts_prob: 0.1
bernoulli_eos_prob: 0.1
random_distr_params:
beta_weights: 1.0
beta_alpha: 10.0
beta_beta: 10.0
bernoulli_bts_prob: 0.1
bernoulli_eos_prob: 0.1
# Buffer
buffer:
data_path: null
train: null
test:
type: grid
n: 900
output_csv: ccube_test.csv
output_pkl: ccube_test.pkl
4 changes: 2 additions & 2 deletions config/env/ctorus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ length_traj: 3
vonmises_min_concentration: 1e-3
# Parameters of the fixed policy output distribution
n_comp: 3
fixed_distribution:
fixed_distr_params:
vonmises_mean: 0.0
vonmises_concentration: 1.0
# Parameters of the random policy output distribution
random_distribution:
random_distr_params:
vonmises_mean: 0.0
vonmises_concentration: 0.01
# Buffer
Expand Down
4 changes: 2 additions & 2 deletions config/env/htorus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ policy_encoding_dim_per_angle: null
length_traj: 3
vonmises_min_concentration: 1e-3
# Parameters of the fixed policy output distribution
fixed_distribution:
fixed_distr_params:
vonmises_mean: 0.0
vonmises_concentration: 0.5
# Parameters of the random policy output distribution
random_distribution:
random_distr_params:
vonmises_mean: 0.0
vonmises_concentration: 0.001
# Buffer
Expand Down
19 changes: 0 additions & 19 deletions config/env/plane.yaml

This file was deleted.

4 changes: 2 additions & 2 deletions config/env/tree.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ test_args:
threshold_components: 3
beta_params_min: 0.1
beta_params_max: 100.0
fixed_distribution:
fixed_distr_params:
beta_alpha: 2.0
beta_beta: 5.0
random_distribution:
random_distr_params:
beta_alpha: 1.0
beta_beta: 1.0
# Buffer
Expand Down
73 changes: 73 additions & 0 deletions config/experiments/ccube/corners.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# @package _global_
# A configuration that works well with the corners proxy.
# wandb: https://wandb.ai/alexhg/cube/runs/9u2d3zzh

defaults:
- override /env: ccube
- override /gflownet: trajectorybalance
- override /proxy: corners
- override /logger: wandb
- override /user: alex

# Environment
env:
n_comp: 5
n_dim: 2
beta_params_min: 0.1
beta_params_max: 100.0
min_incr: 0.1
fixed_distr_params:
beta_weights: 1.0
beta_alpha: 10.0
beta_beta: 10.0
bernoulli_eos_prob: 0.1
bernoulli_bts_prob: 0.1
random_distr_params:
beta_weights: 1.0
beta_alpha: 10.0
beta_beta: 10.0
bernoulli_eos_prob: 0.1
bernoulli_bts_prob: 0.1
reward_func: identity

# GFlowNet hyperparameters
gflownet:
random_action_prob: 0.1
optimizer:
batch_size:
forward: 100
lr: 0.0001
z_dim: 16
lr_z_mult: 100
n_train_steps: 10000
policy:
forward:
type: mlp
n_hid: 512
n_layers: 5
checkpoint: forward
backward:
type: mlp
n_hid: 512
n_layers: 5
shared_weights: False
checkpoint: backward

# WandB
logger:
lightweight: True
project_name: "cube"
tags:
- gflownet
- continuous
- ccube
test:
period: 500
n: 1000
checkpoints:
period: 500

# Hydra
hydra:
run:
dir: ${user.logdir.root}/debug/ccube/${now:%Y-%m-%d_%H-%M-%S}
73 changes: 73 additions & 0 deletions config/experiments/ccube/uniform.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# @package _global_
# A configuration that works well with the uniform proxy.
# wandb: https://wandb.ai/alexhg/cube/runs/1du9iyr5

defaults:
- override /env: ccube
- override /gflownet: trajectorybalance
- override /proxy: uniform
- override /logger: wandb
- override /user: alex

# Environment
env:
n_comp: 2
n_dim: 2
beta_params_min: 0.1
beta_params_max: 100.0
min_incr: 0.1
fixed_distr_params:
beta_weights: 1.0
beta_alpha: 10.0
beta_beta: 10.0
bernoulli_eos_prob: 0.1
bernoulli_bts_prob: 0.1
random_distr_params:
beta_weights: 1.0
beta_alpha: 10.0
beta_beta: 10.0
bernoulli_eos_prob: 0.1
bernoulli_bts_prob: 0.1
reward_func: identity

# GFlowNet hyperparameters
gflownet:
random_action_prob: 0.1
optimizer:
batch_size:
forward: 100
lr: 0.0001
z_dim: 16
lr_z_mult: 100
n_train_steps: 10000
policy:
forward:
type: mlp
n_hid: 256
n_layers: 3
checkpoint: forward
backward:
type: mlp
n_hid: 256
n_layers: 3
shared_weights: False
checkpoint: backward

# WandB
logger:
lightweight: True
project_name: "cube"
tags:
- gflownet
- continuous
- ccube
test:
period: 500
n: 1000
checkpoints:
period: 500

# Hydra
hydra:
run:
dir: ${user.logdir.root}/debug/ccube/${now:%Y-%m-%d_%H-%M-%S}
2 changes: 2 additions & 0 deletions config/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ hydra:
# See: https://hydra.cc/docs/configure_hydra/workdir/
run:
dir: ${user.logdir.root}/${now:%Y-%m-%d_%H-%M-%S}
sweep:
dir: ${user.logdir.root}/multirun/${now:%Y-%m-%d_%H-%M-%S}
job:
# See: https://hydra.cc/docs/upgrades/1.1_to_1.2/changes_to_job_working_dir/
# See: https://hydra.cc/docs/tutorials/basic/running_your_app/working_directory/#disable-changing-current-working-dir-to-jobs-output-dir
Expand Down
49 changes: 38 additions & 11 deletions gflownet/envs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ def __init__(
proxy=None,
oracle=None,
proxy_state_format: str = "oracle",
fixed_distr_params: Optional[dict] = None,
random_distr_params: Optional[dict] = None,
skip_mask_check: bool = False,
fixed_distribution: Optional[dict] = None,
random_distribution: Optional[dict] = None,
conditional: bool = False,
continuous: bool = False,
**kwargs,
Expand Down Expand Up @@ -93,8 +93,10 @@ def __init__(
# Max trajectory length
self.max_traj_length = self.get_max_traj_length()
# Policy outputs
self.fixed_policy_output = self.get_policy_output(fixed_distribution)
self.random_policy_output = self.get_policy_output(random_distribution)
self.fixed_distr_params = fixed_distr_params
self.random_distr_params = random_distr_params
self.fixed_policy_output = self.get_policy_output(self.fixed_distr_params)
self.random_policy_output = self.get_policy_output(self.random_distr_params)
self.policy_output_dim = len(self.fixed_policy_output)
self.policy_input_dim = len(self.state2policy())
if proxy is not None and self.proxy == self.oracle:
Expand Down Expand Up @@ -502,21 +504,44 @@ def sample_actions_batch(
def get_logprobs(
self,
policy_outputs: TensorType["n_states", "policy_output_dim"],
is_forward: bool,
actions: TensorType["n_states", "actions_dim"],
states_target: TensorType["n_states", "policy_input_dim"],
mask_invalid_actions: TensorType["batch_size", "policy_output_dim"] = None,
mask: TensorType["batch_size", "policy_output_dim"] = None,
states_from: Optional[List] = None,
is_backward: bool = False,
) -> TensorType["batch_size"]:
"""
Computes log probabilities of actions given policy outputs and actions. This
implementation is generally valid for all discrete environments but continuous
environments will likely have to implement its own.
Args
----
policy_outputs : tensor
The output of the GFlowNet policy model.
mask : tensor
The mask of invalid actions. For continuous or mixed environments, the mask
may be tensor with an arbitrary length contaning information about special
states, as defined elsewhere in the environment.
actions : tensor
The actions from each state in the batch for which to compute the log
probability.
states_from : tensor
The states originating the actions, in GFlowNet format. Ignored in discrete
environments and only required in certain continuous environments.
is_backward : bool
True if the actions are backward, False if the actions are forward
(default). Ignored in discrete environments and only required in certain
continuous environments.
"""
device = policy_outputs.device
ns_range = torch.arange(policy_outputs.shape[0]).to(device)
logits = policy_outputs
if mask_invalid_actions is not None:
logits[mask_invalid_actions] = -torch.inf
if mask is not None:
logits[mask] = -torch.inf
action_indices = (
torch.tensor(
[self.action_space.index(tuple(action.tolist())) for action in actions]
Expand Down Expand Up @@ -941,10 +966,12 @@ def isclose(state_x, state_y, atol=1e-8):
y_nan = torch.isnan(state_y)
if not torch.equal(x_nan, y_nan):
return False
return torch.all(torch.isclose(state_x[~x_nan], state_y[~y_nan], atol))
return torch.all(
torch.isclose(state_x[~x_nan], state_y[~y_nan], atol=atol)
)
return torch.equal(state_x, state_y)
else:
return np.all(np.isclose(state_x, state_y, atol))
return np.all(np.isclose(state_x, state_y, atol=atol))

def set_energies_stats(self, energies_stats):
self.energies_stats = energies_stats
Expand Down
Loading

0 comments on commit a106516

Please sign in to comment.