Merge pull request #204 from alexhernandezgarcia/cube-sep23-allornone…

…-oldtest Cube working decently fi.na.lly
alexhernandezgarcia · Oct 20, 2023 · a106516 · a106516
2 parents f2f229b + e041556
commit a106516
Show file tree

Hide file tree

Showing 22 changed files with 3,207 additions and 589 deletions.
diff --git a/config/env/alaninedipeptide.yaml b/config/env/alaninedipeptide.yaml
@@ -12,11 +12,11 @@ length_traj: 10
 vonmises_min_concentration: 1e-3
 # Parameters of the fixed policy output distribution
 n_comp: 3
-fixed_distribution:
+fixed_distr_params:
   vonmises_mean: 0.0
   vonmises_concentration: 0.5
 # Parameters of the random policy output distribution
-random_distribution:
+random_distr_params:
   vonmises_mean: 0.0
   vonmises_concentration: 0.001
 # Buffer

diff --git a/config/env/ccube.yaml b/config/env/ccube.yaml
@@ -0,0 +1,39 @@
+defaults:
+  - base
+
+_target_: gflownet.envs.cube.ContinuousCube
+
+id: ccube
+continuous: True
+func: corners
+# Dimensions of hypercube
+n_dim: 2
+# Constant to restrict interval of test sets
+kappa: 1e-3
+# Policy
+min_incr: 0.1
+n_comp: 1
+epsilon: 1e-6
+beta_params_min: 0.1
+beta_params_max: 100.0
+fixed_distr_params:
+  beta_weights: 1.0
+  beta_alpha: 10.0
+  beta_beta: 10.0
+  bernoulli_bts_prob: 0.1
+  bernoulli_eos_prob: 0.1
+random_distr_params:
+  beta_weights: 1.0
+  beta_alpha: 10.0
+  beta_beta: 10.0
+  bernoulli_bts_prob: 0.1
+  bernoulli_eos_prob: 0.1
+# Buffer
+buffer:
+  data_path: null
+  train: null
+  test:
+    type: grid
+    n: 900
+    output_csv: ccube_test.csv
+    output_pkl: ccube_test.pkl
diff --git a/config/env/ctorus.yaml b/config/env/ctorus.yaml
@@ -13,11 +13,11 @@ length_traj: 3
 vonmises_min_concentration: 1e-3
 # Parameters of the fixed policy output distribution
 n_comp: 3
-fixed_distribution:
+fixed_distr_params:
   vonmises_mean: 0.0
   vonmises_concentration: 1.0
 # Parameters of the random policy output distribution
-random_distribution:
+random_distr_params:
   vonmises_mean: 0.0
   vonmises_concentration: 0.01
 # Buffer

diff --git a/config/env/htorus.yaml b/config/env/htorus.yaml
@@ -13,11 +13,11 @@ policy_encoding_dim_per_angle: null
 length_traj: 3
 vonmises_min_concentration: 1e-3
 # Parameters of the fixed policy output distribution
-fixed_distribution:
+fixed_distr_params:
   vonmises_mean: 0.0
   vonmises_concentration: 0.5
 # Parameters of the random policy output distribution
-random_distribution:
+random_distr_params:
   vonmises_mean: 0.0
   vonmises_concentration: 0.001
 # Buffer

diff --git a/config/env/plane.yaml b/config/env/plane.yaml
diff --git a/config/env/tree.yaml b/config/env/tree.yaml
@@ -22,10 +22,10 @@ test_args:
 threshold_components: 3
 beta_params_min: 0.1
 beta_params_max: 100.0
-fixed_distribution:
+fixed_distr_params:
   beta_alpha: 2.0
   beta_beta: 5.0
-random_distribution:
+random_distr_params:
   beta_alpha: 1.0
   beta_beta: 1.0
 # Buffer

diff --git a/config/experiments/ccube/corners.yaml b/config/experiments/ccube/corners.yaml
@@ -0,0 +1,73 @@
+# @package _global_
+# A configuration that works well with the corners proxy.
+# wandb: https://wandb.ai/alexhg/cube/runs/9u2d3zzh
+
+defaults:
+   - override /env: ccube
+   - override /gflownet: trajectorybalance
+   - override /proxy: corners
+   - override /logger: wandb
+   - override /user: alex
+
+# Environment
+env:
+  n_comp: 5
+  n_dim: 2
+  beta_params_min: 0.1
+  beta_params_max: 100.0
+  min_incr: 0.1
+  fixed_distr_params:
+    beta_weights: 1.0
+    beta_alpha: 10.0
+    beta_beta: 10.0
+    bernoulli_eos_prob: 0.1
+    bernoulli_bts_prob: 0.1
+  random_distr_params:
+    beta_weights: 1.0
+    beta_alpha: 10.0
+    beta_beta: 10.0
+    bernoulli_eos_prob: 0.1
+    bernoulli_bts_prob: 0.1
+  reward_func: identity
+
+# GFlowNet hyperparameters
+gflownet:
+  random_action_prob: 0.1
+  optimizer:
+    batch_size:
+      forward: 100
+    lr: 0.0001
+    z_dim: 16
+    lr_z_mult: 100
+    n_train_steps: 10000
+  policy:
+    forward:
+      type: mlp
+      n_hid: 512
+      n_layers: 5
+      checkpoint: forward
+    backward:
+      type: mlp
+      n_hid: 512
+      n_layers: 5
+      shared_weights: False
+      checkpoint: backward
+
+# WandB
+logger:
+  lightweight: True
+  project_name: "cube"
+  tags: 
+    - gflownet
+    - continuous
+    - ccube
+  test:
+    period: 500
+    n: 1000
+  checkpoints:
+    period: 500
+
+# Hydra
+hydra:
+  run:
+    dir: ${user.logdir.root}/debug/ccube/${now:%Y-%m-%d_%H-%M-%S}
diff --git a/config/experiments/ccube/uniform.yaml b/config/experiments/ccube/uniform.yaml
@@ -0,0 +1,73 @@
+# @package _global_
+# A configuration that works well with the uniform proxy.
+# wandb: https://wandb.ai/alexhg/cube/runs/1du9iyr5
+
+defaults:
+   - override /env: ccube
+   - override /gflownet: trajectorybalance
+   - override /proxy: uniform
+   - override /logger: wandb
+   - override /user: alex
+
+# Environment
+env:
+  n_comp: 2
+  n_dim: 2
+  beta_params_min: 0.1
+  beta_params_max: 100.0
+  min_incr: 0.1
+  fixed_distr_params:
+    beta_weights: 1.0
+    beta_alpha: 10.0
+    beta_beta: 10.0
+    bernoulli_eos_prob: 0.1
+    bernoulli_bts_prob: 0.1
+  random_distr_params:
+    beta_weights: 1.0
+    beta_alpha: 10.0
+    beta_beta: 10.0
+    bernoulli_eos_prob: 0.1
+    bernoulli_bts_prob: 0.1
+  reward_func: identity
+
+# GFlowNet hyperparameters
+gflownet:
+  random_action_prob: 0.1
+  optimizer:
+    batch_size:
+      forward: 100
+    lr: 0.0001
+    z_dim: 16
+    lr_z_mult: 100
+    n_train_steps: 10000
+  policy:
+    forward:
+      type: mlp
+      n_hid: 256
+      n_layers: 3
+      checkpoint: forward
+    backward:
+      type: mlp
+      n_hid: 256
+      n_layers: 3
+      shared_weights: False
+      checkpoint: backward
+
+# WandB
+logger:
+  lightweight: True
+  project_name: "cube"
+  tags: 
+    - gflownet
+    - continuous
+    - ccube
+  test:
+    period: 500
+    n: 1000
+  checkpoints:
+    period: 500
+
+# Hydra
+hydra:
+  run:
+    dir: ${user.logdir.root}/debug/ccube/${now:%Y-%m-%d_%H-%M-%S}
diff --git a/config/main.yaml b/config/main.yaml
@@ -21,6 +21,8 @@ hydra:
   # See: https://hydra.cc/docs/configure_hydra/workdir/
   run:
     dir: ${user.logdir.root}/${now:%Y-%m-%d_%H-%M-%S}
+  sweep:
+    dir: ${user.logdir.root}/multirun/${now:%Y-%m-%d_%H-%M-%S}
   job:
     # See: https://hydra.cc/docs/upgrades/1.1_to_1.2/changes_to_job_working_dir/
     # See: https://hydra.cc/docs/tutorials/basic/running_your_app/working_directory/#disable-changing-current-working-dir-to-jobs-output-dir

diff --git a/gflownet/envs/base.py b/gflownet/envs/base.py
@@ -40,9 +40,9 @@ def __init__(
         proxy=None,
         oracle=None,
         proxy_state_format: str = "oracle",
+        fixed_distr_params: Optional[dict] = None,
+        random_distr_params: Optional[dict] = None,
         skip_mask_check: bool = False,
-        fixed_distribution: Optional[dict] = None,
-        random_distribution: Optional[dict] = None,
         conditional: bool = False,
         continuous: bool = False,
         **kwargs,
@@ -93,8 +93,10 @@ def __init__(
         # Max trajectory length
         self.max_traj_length = self.get_max_traj_length()
         # Policy outputs
-        self.fixed_policy_output = self.get_policy_output(fixed_distribution)
-        self.random_policy_output = self.get_policy_output(random_distribution)
+        self.fixed_distr_params = fixed_distr_params
+        self.random_distr_params = random_distr_params
+        self.fixed_policy_output = self.get_policy_output(self.fixed_distr_params)
+        self.random_policy_output = self.get_policy_output(self.random_distr_params)
         self.policy_output_dim = len(self.fixed_policy_output)
         self.policy_input_dim = len(self.state2policy())
         if proxy is not None and self.proxy == self.oracle:
@@ -502,21 +504,44 @@ def sample_actions_batch(
     def get_logprobs(
         self,
         policy_outputs: TensorType["n_states", "policy_output_dim"],
-        is_forward: bool,
         actions: TensorType["n_states", "actions_dim"],
-        states_target: TensorType["n_states", "policy_input_dim"],
-        mask_invalid_actions: TensorType["batch_size", "policy_output_dim"] = None,
+        mask: TensorType["batch_size", "policy_output_dim"] = None,
+        states_from: Optional[List] = None,
+        is_backward: bool = False,
     ) -> TensorType["batch_size"]:
         """
         Computes log probabilities of actions given policy outputs and actions. This
         implementation is generally valid for all discrete environments but continuous
         environments will likely have to implement its own.
+
+        Args
+        ----
+        policy_outputs : tensor
+            The output of the GFlowNet policy model.
+
+        mask : tensor
+            The mask of invalid actions. For continuous or mixed environments, the mask
+            may be tensor with an arbitrary length contaning information about special
+            states, as defined elsewhere in the environment.
+
+        actions : tensor
+            The actions from each state in the batch for which to compute the log
+            probability.
+
+        states_from : tensor
+            The states originating the actions, in GFlowNet format. Ignored in discrete
+            environments and only required in certain continuous environments.
+
+        is_backward : bool
+            True if the actions are backward, False if the actions are forward
+            (default). Ignored in discrete environments and only required in certain
+            continuous environments.
         """
         device = policy_outputs.device
         ns_range = torch.arange(policy_outputs.shape[0]).to(device)
         logits = policy_outputs
-        if mask_invalid_actions is not None:
-            logits[mask_invalid_actions] = -torch.inf
+        if mask is not None:
+            logits[mask] = -torch.inf
         action_indices = (
             torch.tensor(
                 [self.action_space.index(tuple(action.tolist())) for action in actions]
@@ -941,10 +966,12 @@ def isclose(state_x, state_y, atol=1e-8):
                 y_nan = torch.isnan(state_y)
                 if not torch.equal(x_nan, y_nan):
                     return False
-                return torch.all(torch.isclose(state_x[~x_nan], state_y[~y_nan], atol))
+                return torch.all(
+                    torch.isclose(state_x[~x_nan], state_y[~y_nan], atol=atol)
+                )
             return torch.equal(state_x, state_y)
         else:
-            return np.all(np.isclose(state_x, state_y, atol))
+            return np.all(np.isclose(state_x, state_y, atol=atol))
 
     def set_energies_stats(self, energies_stats):
         self.energies_stats = energies_stats