Merge pull request #116 from ai4co/v0.3.2

V0.3.2
ai4co · Feb 21, 2024 · 83b8550 · 83b8550
2 parents 75eba9c + 7be225d
commit 83b8550
Show file tree

Hide file tree

Showing 20 changed files with 102 additions and 110 deletions.
diff --git a/notebooks/tutorials/2-creating-new-env-model.ipynb b/notebooks/tutorials/2-creating-new-env-model.ipynb
@@ -290,8 +290,8 @@
     "    \"\"\"Make the observation and action specs from the parameters\"\"\"\n",
     "    self.observation_spec = CompositeSpec(\n",
     "        locs=BoundedTensorSpec(\n",
-    "            minimum=self.min_loc,\n",
-    "            maximum=self.max_loc,\n",
+    "            low=self.min_loc,\n",
+    "            high=self.max_loc,\n",
     "            shape=(self.num_loc, 2),\n",
     "            dtype=torch.float32,\n",
     "        ),\n",
@@ -316,8 +316,8 @@
     "    self.action_spec = BoundedTensorSpec(\n",
     "        shape=(1,),\n",
     "        dtype=torch.int64,\n",
-    "        minimum=0,\n",
-    "        maximum=self.num_loc,\n",
+    "        low=0,\n",
+    "        high=self.num_loc,\n",
     "    )\n",
     "    self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))\n",
     "    self.done_spec = UnboundedDiscreteTensorSpec(shape=(1,), dtype=torch.bool)"

diff --git a/rl4co/__init__.py b/rl4co/__init__.py
@@ -1 +1 @@
-__version__ = "0.3.1"
+__version__ = "0.3.2"
diff --git a/rl4co/envs/common/base.py b/rl4co/envs/common/base.py
@@ -14,7 +14,11 @@
 
 
 class RL4COEnvBase(EnvBase):
-    """Base class for RL4CO environments based on TorchRL EnvBase
+    """Base class for RL4CO environments based on TorchRL EnvBase.
+    The environment has the usual methods for stepping, resetting, and getting the specifications of the environment
+    that shoud be implemented by the subclasses of this class.
+    It also has methods for getting the reward, action mask, and checking the validity of the solution, and
+    for generating and loading the datasets (supporting multiple dataloaders as well for validation and testing).
 
     Args:
         data_dir: Root directory for the dataset
@@ -27,6 +31,9 @@ class RL4COEnvBase(EnvBase):
         dataset_cls: Dataset class to use for the environment (which can influence performance)
         seed: Seed for the environment
         device: Device to use. Generally, no need to set as tensors are updated on the fly
+        batch_size: Batch size to use for the environment. Generally, no need to set as tensors are updated on the fly
+        run_type_checks: If True, run type checks on the TensorDicts at each step
+        allow_done_after_reset: If True, an environment can be done after a reset
         _torchrl_mode: Whether to use the TorchRL mode (see :meth:`step` for more details)
     """
 
@@ -45,10 +52,24 @@ def __init__(
         dataset_cls: callable = TensorDictDataset,
         seed: int = None,
         device: str = "cpu",
+        batch_size: torch.Size = None,
+        run_type_checks: bool = False,
+        allow_done_after_reset: bool = False,
         _torchrl_mode: bool = False,
         **kwargs,
     ):
-        super().__init__(device=device, batch_size=[])
+        super().__init__(
+            device=device,
+            batch_size=batch_size,
+            run_type_checks=run_type_checks,
+            allow_done_after_reset=allow_done_after_reset,
+        )
+        # if any kwargs are left, we want to warn the user
+        if kwargs:
+            log.warning(
+                f"Unused keyword arguments: {', '.join(kwargs.keys())}. "
+                "Please check the documentation for the correct keyword arguments"
+            )
         self.data_dir = data_dir
         self.train_file = pjoin(data_dir, train_file) if train_file is not None else None
         self._torchrl_mode = _torchrl_mode

diff --git a/rl4co/envs/eda/dpp.py b/rl4co/envs/eda/dpp.py
@@ -142,8 +142,8 @@ def _make_spec(self, td_params):
         """Make the observation and action specs from the parameters"""
         self.observation_spec = CompositeSpec(
             locs=BoundedTensorSpec(
-                minimum=self.min_loc,
-                maximum=self.max_loc,
+                low=self.min_loc,
+                high=self.max_loc,
                 shape=(self.size**2, 2),
                 dtype=torch.float32,
             ),
@@ -168,8 +168,8 @@ def _make_spec(self, td_params):
         self.action_spec = BoundedTensorSpec(
             shape=(1,),
             dtype=torch.int64,
-            minimum=0,
-            maximum=self.size**2,
+            low=0,
+            high=self.size**2,
         )
         self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))
         self.done_spec = UnboundedDiscreteTensorSpec(shape=(1,), dtype=torch.bool)

diff --git a/rl4co/envs/eda/mdpp.py b/rl4co/envs/eda/mdpp.py
@@ -76,8 +76,8 @@ def _make_spec(self, td_params):
         """Make the observation and action specs from the parameters"""
         self.observation_spec = CompositeSpec(
             locs=BoundedTensorSpec(
-                minimum=self.min_loc,
-                maximum=self.max_loc,
+                low=self.min_loc,
+                high=self.max_loc,
                 shape=(self.size**2, 2),
                 dtype=torch.float32,
             ),
@@ -102,8 +102,8 @@ def _make_spec(self, td_params):
         self.action_spec = BoundedTensorSpec(
             shape=(1,),
             dtype=torch.int64,
-            minimum=0,
-            maximum=self.size**2,
+            low=0,
+            high=self.size**2,
         )
         self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))
         self.done_spec = UnboundedDiscreteTensorSpec(shape=(1,), dtype=torch.bool)

diff --git a/rl4co/envs/routing/atsp.py b/rl4co/envs/routing/atsp.py
@@ -114,8 +114,8 @@ def _reset(self, td: Optional[TensorDict] = None, batch_size=None) -> TensorDict
     def _make_spec(self, td_params: TensorDict = None):
         self.observation_spec = CompositeSpec(
             cost_matrix=BoundedTensorSpec(
-                minimum=self.min_dist,
-                maximum=self.max_dist,
+                low=self.min_dist,
+                high=self.max_dist,
                 shape=(self.num_loc, self.num_loc),
                 dtype=torch.float32,
             ),
@@ -140,8 +140,8 @@ def _make_spec(self, td_params: TensorDict = None):
         self.action_spec = BoundedTensorSpec(
             shape=(1,),
             dtype=torch.int64,
-            minimum=0,
-            maximum=self.num_loc,
+            low=0,
+            high=self.num_loc,
         )
         self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))
         self.done_spec = UnboundedDiscreteTensorSpec(shape=(1,), dtype=torch.bool)

diff --git a/rl4co/envs/routing/cvrp.py b/rl4co/envs/routing/cvrp.py
@@ -260,8 +260,8 @@ def _make_spec(self, td_params: TensorDict):
         """Make the observation and action specs from the parameters."""
         self.observation_spec = CompositeSpec(
             locs=BoundedTensorSpec(
-                minimum=self.min_loc,
-                maximum=self.max_loc,
+                low=self.min_loc,
+                high=self.max_loc,
                 shape=(self.num_loc + 1, 2),
                 dtype=torch.float32,
             ),
@@ -270,8 +270,8 @@ def _make_spec(self, td_params: TensorDict):
                 dtype=torch.int64,
             ),
             demand=BoundedTensorSpec(
-                minimum=-self.capacity,
-                maximum=self.max_demand,
+                low=-self.capacity,
+                high=self.max_demand,
                 shape=(self.num_loc, 1),  # demand is only for customers
                 dtype=torch.float32,
             ),
@@ -284,8 +284,8 @@ def _make_spec(self, td_params: TensorDict):
         self.action_spec = BoundedTensorSpec(
             shape=(1,),
             dtype=torch.int64,
-            minimum=0,
-            maximum=self.num_loc + 1,
+            low=0,
+            high=self.num_loc + 1,
         )
         self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))
         self.done_spec = UnboundedDiscreteTensorSpec(shape=(1,), dtype=torch.bool)

diff --git a/rl4co/envs/routing/mpdp.py b/rl4co/envs/routing/mpdp.py
@@ -337,8 +337,8 @@ def _make_spec(self, td_params: TensorDict):
         max_nodes = self.num_loc + self.max_num_agents + 1
         self.observation_spec = CompositeSpec(
             locs=BoundedTensorSpec(
-                minimum=self.min_loc,
-                maximum=self.max_loc,
+                low=self.min_loc,
+                high=self.max_loc,
                 shape=(max_nodes, 2),
                 dtype=torch.float32,
             ),
@@ -363,8 +363,8 @@ def _make_spec(self, td_params: TensorDict):
                 dtype=torch.float32,
             ),
             cur_coord=BoundedTensorSpec(
-                minimum=self.min_loc,
-                maximum=self.max_loc,
+                low=self.min_loc,
+                high=self.max_loc,
                 shape=(2,),
                 dtype=torch.float32,
             ),
@@ -417,8 +417,8 @@ def _make_spec(self, td_params: TensorDict):
         self.action_spec = BoundedTensorSpec(
             shape=(1,),
             dtype=torch.int64,
-            minimum=0,
-            maximum=max_nodes,
+            low=0,
+            high=max_nodes,
         )
         self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))
         self.done_spec = UnboundedDiscreteTensorSpec(shape=(1,), dtype=torch.bool)

diff --git a/rl4co/envs/routing/mtsp.py b/rl4co/envs/routing/mtsp.py
@@ -171,8 +171,8 @@ def _make_spec(self, td_params: TensorDict):
         """Make the observation and action specs from the parameters."""
         self.observation_spec = CompositeSpec(
             locs=BoundedTensorSpec(
-                minimum=self.min_loc,
-                maximum=self.max_loc,
+                low=self.min_loc,
+                high=self.max_loc,
                 shape=(self.num_loc, 2),
                 dtype=torch.float32,
             ),
@@ -213,8 +213,8 @@ def _make_spec(self, td_params: TensorDict):
         self.action_spec = BoundedTensorSpec(
             shape=(1,),
             dtype=torch.int64,
-            minimum=0,
-            maximum=self.num_loc,
+            low=0,
+            high=self.num_loc,
         )
         self.reward_spec = UnboundedContinuousTensorSpec()
         self.done_spec = UnboundedDiscreteTensorSpec(dtype=torch.bool)

diff --git a/rl4co/envs/routing/op.py b/rl4co/envs/routing/op.py
@@ -273,8 +273,8 @@ def _make_spec(self, td_params: TensorDict):
         """Make the observation and action specs from the parameters."""
         self.observation_spec = CompositeSpec(
             locs=BoundedTensorSpec(
-                minimum=self.min_loc,
-                maximum=self.max_loc,
+                low=self.min_loc,
+                high=self.max_loc,
                 shape=(self.num_loc + 1, 2),
                 dtype=torch.float32,
             ),
@@ -307,8 +307,8 @@ def _make_spec(self, td_params: TensorDict):
         self.action_spec = BoundedTensorSpec(
             shape=(1,),
             dtype=torch.int64,
-            minimum=0,
-            maximum=self.num_loc + 1,
+            low=0,
+            high=self.num_loc + 1,
         )
         self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))
         self.done_spec = UnboundedDiscreteTensorSpec(shape=(1,), dtype=torch.bool)

diff --git a/rl4co/envs/routing/pctsp.py b/rl4co/envs/routing/pctsp.py
@@ -271,8 +271,8 @@ def _make_spec(self, td_params: TensorDict):
         """Make the locs and action specs from the parameters."""
         self.observation_spec = CompositeSpec(
             locs=BoundedTensorSpec(
-                minimum=self.min_loc,
-                maximum=self.max_loc,
+                low=self.min_loc,
+                high=self.max_loc,
                 shape=(self.num_loc, 2),
                 dtype=torch.float32,
             ),
@@ -321,8 +321,8 @@ def _make_spec(self, td_params: TensorDict):
         self.action_spec = BoundedTensorSpec(
             shape=(1,),
             dtype=torch.int64,
-            minimum=0,
-            maximum=self.num_loc,
+            low=0,
+            high=self.num_loc,
         )
         self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))
         self.done_spec = UnboundedDiscreteTensorSpec(shape=(1,), dtype=torch.bool)

diff --git a/rl4co/envs/routing/pdp.py b/rl4co/envs/routing/pdp.py
@@ -144,8 +144,8 @@ def _make_spec(self, td_params: TensorDict):
         """Make the observation and action specs from the parameters."""
         self.observation_spec = CompositeSpec(
             locs=BoundedTensorSpec(
-                minimum=self.min_loc,
-                maximum=self.max_loc,
+                low=self.min_loc,
+                high=self.max_loc,
                 shape=(self.num_loc + 1, 2),
                 dtype=torch.float32,
             ),
@@ -170,8 +170,8 @@ def _make_spec(self, td_params: TensorDict):
         self.action_spec = BoundedTensorSpec(
             shape=(1,),
             dtype=torch.int64,
-            minimum=0,
-            maximum=self.num_loc + 1,
+            low=0,
+            high=self.num_loc + 1,
         )
         self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))
         self.done_spec = UnboundedDiscreteTensorSpec(shape=(1,), dtype=torch.bool)

diff --git a/rl4co/envs/routing/sdvrp.py b/rl4co/envs/routing/sdvrp.py
@@ -176,8 +176,8 @@ def _make_spec(self, td_params: TensorDict):
         """Make the observation and action specs from the parameters."""
         self.observation_spec = CompositeSpec(
             locs=BoundedTensorSpec(
-                minimum=self.min_loc,
-                maximum=self.max_loc,
+                low=self.min_loc,
+                high=self.max_loc,
                 shape=(self.num_loc + 1, 2),
                 dtype=torch.float32,
             ),
@@ -186,20 +186,20 @@ def _make_spec(self, td_params: TensorDict):
                 dtype=torch.int64,
             ),
             demand=BoundedTensorSpec(
-                minimum=self.min_demand,
-                maximum=self.max_demand,
+                low=self.min_demand,
+                high=self.max_demand,
                 shape=(self.num_loc, 1),  # demand is only for customers
                 dtype=torch.float32,
             ),
             demand_with_depot=BoundedTensorSpec(
-                minimum=self.min_demand,
-                maximum=self.max_demand,
+                low=self.min_demand,
+                high=self.max_demand,
                 shape=(self.num_loc + 1, 1),
                 dtype=torch.float32,
             ),
             used_capacity=BoundedTensorSpec(
-                minimum=0,
-                maximum=self.vehicle_capacity,
+                low=0,
+                high=self.vehicle_capacity,
                 shape=(1,),
                 dtype=torch.float32,
             ),
@@ -212,8 +212,8 @@ def _make_spec(self, td_params: TensorDict):
         self.action_spec = BoundedTensorSpec(
             shape=(1,),
             dtype=torch.int64,
-            minimum=0,
-            maximum=self.num_loc + 1,
+            low=0,
+            high=self.num_loc + 1,
         )
         self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))
         self.done_spec = UnboundedDiscreteTensorSpec(shape=(1,), dtype=torch.bool)
diff --git a/rl4co/envs/routing/tsp.py b/rl4co/envs/routing/tsp.py
@@ -111,8 +111,8 @@ def _make_spec(self, td_params):
         """Make the observation and action specs from the parameters"""
         self.observation_spec = CompositeSpec(
             locs=BoundedTensorSpec(
-                minimum=self.min_loc,
-                maximum=self.max_loc,
+                low=self.min_loc,
+                high=self.max_loc,
                 shape=(self.num_loc, 2),
                 dtype=torch.float32,
             ),
@@ -137,8 +137,8 @@ def _make_spec(self, td_params):
         self.action_spec = BoundedTensorSpec(
             shape=(1,),
             dtype=torch.int64,
-            minimum=0,
-            maximum=self.num_loc,
+            low=0,
+            high=self.num_loc,
         )
         self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))
         self.done_spec = UnboundedDiscreteTensorSpec(shape=(1,), dtype=torch.bool)