diff --git a/python/deeplake/__init__.py b/python/deeplake/__init__.py
index 5684486de2..538b73c46e 100644
--- a/python/deeplake/__init__.py
+++ b/python/deeplake/__init__.py
@@ -14,7 +14,7 @@ def progress_bar(iterable, *args, **kwargs):
 import deeplake
 from ._deeplake import *
 
-__version__ = "4.1.1"
+__version__ = "4.1.2"
 
 __all__ = [
     "__version__",
@@ -128,9 +128,7 @@ def progress_bar(iterable, *args, **kwargs):
     "types",
     "Client",
     "client",
-    "__child_atfork",
     "__prepare_atfork",
-    "__parent_atfork",
 ]
 
 
@@ -259,7 +257,7 @@ def transfer_with_links(source, dest, links, column_names):
 
 
 def __register_at_fork():
-    from ._deeplake import __prepare_atfork, __parent_atfork, __child_atfork
+    from ._deeplake import __prepare_atfork
 
     UNSAFE_TYPES = (
         Dataset,
@@ -300,13 +298,13 @@ def check_main_globals_for_unsafe_types():
 
     def before_fork():
         check_main_globals_for_unsafe_types()
-        __prepare_atfork()
+        pass
 
     def after_fork_parent():
-        __parent_atfork()
+        pass
 
     def after_fork_child():
-        __child_atfork()
+        pass
 
     os.register_at_fork(
         before=before_fork,
@@ -314,5 +312,11 @@ def after_fork_child():
         after_in_child=after_fork_child,
     )
 
+    ff = os.fork
+    def fork():
+        __prepare_atfork()
+        return ff()
+
+    os.fork = fork
 
 __register_at_fork()
diff --git a/python/deeplake/__init__.pyi b/python/deeplake/__init__.pyi
index 7fb934332a..7ee819eb5b 100644
--- a/python/deeplake/__init__.pyi
+++ b/python/deeplake/__init__.pyi
@@ -118,304 +118,572 @@ __all__ = [
     "types",
     "Client",
     "client",
-    "__child_atfork",
     "__prepare_atfork",
-    "__parent_atfork",
 ]
 
 class Future:
     """
-    A future that represents a value that will be resolved in the future.
+    A future representing an asynchronous operation result in ML pipelines.
 
-    Once the Future is resolved, it will hold the result, and you can retrieve it
-    using either a blocking call (`result()`) or via asynchronous mechanisms (`await`).
-
-    The future will resolve automatically even if you do not explicitly wait for it.
+    The Future class enables non-blocking operations for data loading and processing,
+    particularly useful when working with large ML datasets or distributed training.
+    Once resolved, the Future holds the operation result which can be accessed either
+    synchronously or asynchronously.
 
     Methods:
         result() -> typing.Any:
-            Blocks until the Future is resolved and returns the object.
+            Blocks until the Future resolves and returns the result.
 
         __await__() -> typing.Any:
-            Awaits the future asynchronously and returns the object once it's ready.
+            Enables using the Future in async/await syntax.
 
         is_completed() -> bool:
-            Returns True if the Future is already resolved, False otherwise.
+            Checks if the Future has resolved without blocking.
+    <!-- test-context
+    ```python
+    import deeplake
+    ds = deeplake.create("mem://ml-data/embeddings")
+    ds = deeplake.create("mem://ml-data/images")
+    ds.add_column("images", "int32")
+    ds.append({"images": [0] * 300})
+    deeplake.open_async = lambda x: deeplake._deeplake.open_async(x.replace("s3://", "mem://"))
+    ```
+    -->
+
+    Examples:
+        Loading ML dataset asynchronously:
+        ```python
+        future = deeplake.open_async("s3://ml-data/embeddings")
+        
+        # Check status without blocking
+        if not future.is_completed():
+            print("Still loading...")
+            
+        # Block until ready
+        ds = future.result()
+        ```
+
+        Using with async/await:
+        ```python
+        async def load_data():
+            ds = await deeplake.open_async("s3://ml-data/images")
+            batch = await ds.images.get_async(slice(0, 32))
+            return batch
+        ```
     """
 
     def result(self) -> typing.Any:
         """
-        Blocks until the Future is resolved, then returns the result.
+        Blocks until the Future resolves and returns the result.
 
         Returns:
-            typing.Any: The result when the Future is resolved.
+            typing.Any: The operation result once resolved.
+
+        <!-- test-context
+        ```python
+        import deeplake
+        import numpy as np
+        ds = deeplake.create("tmp://")
+        ds.add_column("images", "int32")
+        ds.append({"images": [0] * 300})
+        ```
+        -->
+
+        Examples:
+        ```python
+        future = ds["images"].get_async(slice(0, 32)) 
+        batch = future.result()  # Blocks until batch is loaded
+        ```
         """
         ...
 
     def __await__(self) -> typing.Any:
         """
-        Awaits the resolution of the Future asynchronously.
+        Makes the Future compatible with async/await syntax.
+
+        <!-- test-context
+        ```python
+        import deeplake
+        import numpy as np
+        ds = deeplake.create("tmp://")
+        ds.add_column("images", "int32")
+        ds.append({"images": [0] * 300})
+        ```
+        -->
 
         Examples:
-            ```python
-            result = await future
-            ```
+        ```python
+        async def load_batch():
+            batch = await ds["images"].get_async(slice(0, 32))
+        ```
 
         Returns:
-            typing.Any: The result when the Future is resolved.
+            typing.Any: The operation result once resolved.
         """
         ...
 
     def is_completed(self) -> bool:
         """
-        Checks if the Future has been resolved.
+        Checks if the Future has resolved without blocking.
 
         Returns:
-            bool: True if the Future is resolved, False otherwise.
+            bool: True if resolved, False if still pending.
+
+        <!-- test-context
+        ```python
+        import deeplake
+        import numpy as np
+        ds = deeplake.create("tmp://")
+        ds.add_column("label", "int32")
+        ds.append({"label": [0] * 300})
+        ds["label"].metadata["class_names"] = ["car", "truck", "bus"]
+        ```
+        -->
+
+        Examples:
+        ```python
+        future = ds.query_async("SELECT * WHERE label = 'car'")
+        if future.is_completed():
+            results = future.result()
+        else:
+            print("Query still running...")
+        ```
         """
         ...
 
 class FutureVoid:
     """
-    A future that represents the completion of an operation that returns no result.
-
-    The future will resolve automatically to `None`, even if you do not explicitly wait for it.
+    A Future representing a void async operation in ML pipelines.
+    
+    Similar to Future but for operations that don't return values, like saving
+    or committing changes. Useful for non-blocking data management operations.
 
     Methods:
         wait() -> None:
-            Blocks until the FutureVoid is resolved and then returns `None`.
+            Blocks until operation completes.
 
         __await__() -> None:
-            Awaits the FutureVoid asynchronously and returns `None` once the operation is complete.
+            Enables using with async/await syntax.
 
         is_completed() -> bool:
-            Returns True if the FutureVoid is already resolved, False otherwise.
+            Checks completion status without blocking.
+
+    <!-- test-context
+    ```python
+    import deeplake
+    ds = deeplake.create("tmp://")
+    ds.add_column("embeddings", "float32")
+    ds.append({"embeddings": [0.1] * 100})
+    new_embeddings = [0.2] * 32
+    def process_other_data():
+        pass
+    ```
+    -->
+
+    Examples:
+        Asynchronous dataset updates:
+        ```python
+        # Update embeddings without blocking
+        future = ds["embeddings"].set_async(slice(0, 32), new_embeddings)
+        
+        # Do other work while update happens
+        process_other_data()
+        
+        # Wait for update to complete
+        future.wait()
+        ```
+
+        Using with async/await:
+        ```python
+        async def update_dataset():
+            await ds.commit_async()
+            print("Changes saved")
+        ```
     """
 
     def wait(self) -> None:
         """
-        Blocks until the FutureVoid is resolved, then returns `None`.
+        Blocks until the operation completes.
+
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ```
+        -->
 
         Examples:
             ```python
-            future_void.wait()  # Blocks until the operation completes.
+            future = ds.commit_async()
+            future.wait()  # Blocks until commit finishes
             ```
-
-        Returns:
-            None: Indicates the operation has completed.
         """
         ...
 
     def __await__(self) -> None:
         """
-        Awaits the resolution of the FutureVoid asynchronously.
+        Makes the FutureVoid compatible with async/await syntax.
 
-        Examples:
-            ```python
-            await future_void  # Waits for the completion of the async operation.
-            ```
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ```
+        -->
 
-        Returns:
-            None: Indicates the operation has completed.
+        Examples:
+        ```python
+        async def save_changes():
+            await ds.commit_async()
+        ```
         """
         ...
 
     def is_completed(self) -> bool:
         """
-        Checks if the FutureVoid has been resolved.
+        Checks if the operation has completed without blocking.
 
         Returns:
-            bool: True if the FutureVoid is resolved, False otherwise.
+            bool: True if completed, False if still running.
+
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ```
+        -->
+
+        Examples:
+            ```python
+            future = ds.commit_async()
+            if future.is_completed():
+                print("Commit finished")
+            else:
+                print("Commit still running...")
+            ```
         """
         ...
 
 class ReadOnlyMetadata:
     """
-    ReadOnlyMetadata is a key-value store.
+    Read-only access to dataset and column metadata for ML workflows.
+    
+    Stores important information about datasets like:
+    - Model parameters and hyperparameters
+    - Preprocessing statistics (mean, std, etc.)
+    - Data splits and fold definitions
+    - Version and training information
+
+    <!-- test-context
+    ```python
+    import deeplake
+    ds = deeplake.create("tmp://")
+    ds.add_column("images", "int32")
+    ds.metadata["model_name"] = "resnet50"
+    ds.metadata["hyperparameters"] = {"learning_rate": 0.001, "batch_size": 32}
+    ds["images"].metadata["mean"] = [0.485, 0.456, 0.406]
+    ds["images"].metadata["std"] = [0.229, 0.224, 0.225]
+    ```
+    -->
+
+    Examples:
+        Accessing model metadata:
+        ```python
+        metadata = ds.metadata
+        model_name = metadata["model_name"]
+        model_params = metadata["hyperparameters"]
+        ```
+
+        Reading preprocessing stats:
+        ```python
+        mean = ds["images"].metadata["mean"]
+        std = ds["images"].metadata["std"]
+        ```
     """
 
     def __getitem__(self, key: str) -> typing.Any:
         """
-        Get the value for the given key
+        Gets metadata value for the given key.
+
+        Args:
+            key: Metadata key to retrieve
+
+        Returns:
+            The stored metadata value
+
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("images", "int32")
+        ds.metadata["model_name"] = "resnet50"
+        ds.metadata["hyperparameters"] = {"learning_rate": 0.001, "batch_size": 32}
+        ds["images"].metadata["mean"] = [0.485, 0.456, 0.406]
+        ds["images"].metadata["std"] = [0.229, 0.224, 0.225]
+        ```
+        -->
+
+        Examples:
+            ```python
+            mean = ds["images"].metadata["mean"]
+            std = ds["images"].metadata["std"]
+            ```
         """
         ...
 
     def keys(self) -> list[str]:
         """
-        Return a list of all keys in the metadata
+        Lists all available metadata keys.
+
+        Returns:
+            list[str]: List of metadata key names
+
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("images", "int32")
+        metadata = ds.metadata
+        ```
+        -->
+
+        Examples:
+            ```python
+            # Print all metadata
+            for key in metadata.keys():
+                print(f"{key}: {metadata[key]}")
+            ```
         """
         ...
 
 class Metadata(ReadOnlyMetadata):
     """
-    Metadata is a key-value store.
+    Writable access to dataset and column metadata for ML workflows.
+
+    Stores important information about datasets like:
+    - Model parameters and hyperparameters 
+    - Preprocessing statistics
+    - Data splits and fold definitions
+    - Version and training information
+
+    Changes are persisted immediately without requiring `commit()`.
+
+    Examples:
+        Storing model metadata:
+
+            dataset.metadata["model_name"] = "resnet50"
+            dataset.metadata["hyperparameters"] = {
+                "learning_rate": 0.001,
+                "batch_size": 32
+            }
+
+        Setting preprocessing stats:
+
+            dataset.images.metadata["mean"] = [0.485, 0.456, 0.406]
+            dataset.images.metadata["std"] = [0.229, 0.224, 0.225]
     """
 
     def __setitem__(self, key: str, value: typing.Any) -> None:
         """
-        Set the value for the given key. Setting the value will immediately persist the change without requiring a commit().
+        Sets metadata value for given key. Changes are persisted immediately.
+
+        Args:
+            key: Metadata key to set
+            value: Value to store
+
+        Examples:
+            ```python
+            ds.metadata["train_split"] = 0.8
+            ds.metadata["val_split"] = 0.1
+            ds.metadata["test_split"] = 0.1
+            ```
         """
         ...
 
 def query(query: str, token: str | None = None) -> DatasetView:
     """
-    Executes a TQL (Tensor Query Language) query and returns a filtered DatasetView.
+    Executes TQL queries optimized for ML data filtering and search.
     
-    TQL provides SQL-like querying capabilities specifically designed for ML datasets, allowing you 
-    to filter, sort, and select data based on various criteria including vector similarity.
+    TQL is a SQL-like query language designed for ML datasets, supporting:
+    - Vector similarity search
+    - Text semantic search
+    - Complex data filtering
+    - Joining across datasets
+    - Efficient sorting and pagination
 
     Args:
-        query: A TQL query string. The query can:
-            - Filter rows using WHERE clauses
-            - Sort results using ORDER BY
-            - Select specific columns using SELECT
-            - Perform vector similarity search using BM25_SIMILARITY
-            - Join multiple datasets
-        token: Optional Activeloop token for authentication. Not required if using environment 
-            credentials.
+        query: TQL query string supporting:
+            - Vector similarity: COSINE_SIMILARITY, EUCLIDEAN_DISTANCE
+            - Text search: BM25_SIMILARITY, CONTAINS
+            - Filtering: WHERE clauses
+            - Sorting: ORDER BY
+            - Joins: JOIN across datasets
+        token: Optional Activeloop authentication token
 
     Returns:
-        DatasetView: A view containing the query results. The view can be:
-            - Used directly for ML training
+        DatasetView: Query results that can be:
+            - Used directly in ML training
             - Further filtered with additional queries
             - Converted to PyTorch/TensorFlow dataloaders
             - Materialized into a new dataset
 
-    Examples:
-        Basic filtering:
-        ```python
-        # Select images with high confidence labels
-        view = deeplake.query(f'SELECT * FROM "{ds_path}" WHERE confidence > 0.9')
-        
-        # Get samples from specific classes
-        cats = deeplake.query(f'SELECT * FROM "{ds_path}" WHERE label IN (\'cat\', \'kitten\')')
-        ```
+    <!-- test-context
+    ```python
+    import deeplake
+    ds = deeplake.create("mem://embeddings")
+    ds.add_column("vector", deeplake.types.Array("float32", 1))
+    ds.commit()
+    ds = deeplake.create("mem://documents")
+    ds.add_column("text", "text")
+    ds.commit()
+    ds = deeplake.create("mem://dataset")
+    ds.add_column("split", "text")
+    ds.add_column("confidence", "float32")
+    ds.add_column("label", "text")
+    ds.commit()
+    ds = deeplake.create("mem://images")
+    ds.add_column("id", "int32")
+    ds.add_column("image", "int32")
+    ds.add_column("embedding", deeplake.types.Array("float32", 1))
+    ds.commit()
+    ds = deeplake.create("mem://metadata")
+    ds.add_column("image_id", "int32")
+    ds.add_column("labels", "text")
+    ds.add_column("metadata", "text")
+    ds.add_column("verified", "bool")
+    ds.commit()
+    ```
+    -->
 
-        Text similarity search:
+    Examples:
+        Vector similarity search:
         ```python
-        # Find semantically similar text using BM25
-        similar = deeplake.query(f'''
-            SELECT * FROM "{ds_path}"
-            ORDER BY BM25_SIMILARITY(text_column, 'query text') DESC 
+        # Find similar embeddings
+        similar = deeplake.query('''
+            SELECT * FROM "mem://embeddings" 
+            ORDER BY COSINE_SIMILARITY(vector, ARRAY[0.1, 0.2, 0.3]) DESC
             LIMIT 100
         ''')
+
+        # Use results in training
+        dataloader = similar.pytorch()
         ```
 
-        Vector similarity search:
+        Text semantic search:
         ```python
-        # Find nearest neighbor embeddings
-        neighbors = deeplake.query(f'''
-            SELECT * FROM "{ds_path}"
-            ORDER BY COSINE_SIMILARITY(embedding, ARRAY[0.1, 0.2, ...]) DESC
+        # Search documents using BM25
+        relevant = deeplake.query('''
+            SELECT * FROM "mem://documents"
+            ORDER BY BM25_SIMILARITY(text, 'machine learning') DESC
             LIMIT 10
         ''')
         ```
 
-        Joins across datasets:
+        Complex filtering:
         ```python
-        # Join images with their metadata
-        results = deeplake.query(f'''
-            SELECT i.image, m.label, m.bbox 
-            FROM "{image_ds_path}" AS i
-            JOIN "{metadata_ds_path}" AS m ON i.id = m.image_id
-            WHERE m.verified = true
+        # Filter training data
+        train = deeplake.query('''
+            SELECT * FROM "mem://dataset"
+            WHERE "split" = 'train' 
+            AND confidence > 0.9
+            AND label IN ('cat', 'dog')
         ''')
         ```
 
-        Using with ML frameworks:
+        Joins for feature engineering:
         ```python
-        # Filter dataset and create PyTorch dataloader
-        train_data = deeplake.query("SELECT * FROM dataset WHERE split = 'train'")
-        train_loader = train_data.pytorch().dataloader(batch_size=32)
+        # Combine image features with metadata
+        features = deeplake.query('''
+            SELECT i.image, i.embedding, m.labels, m.metadata
+            FROM "mem://images" AS i
+            JOIN "mem://metadata" AS m ON i.id = m.image_id
+            WHERE m.verified = true
+        ''')
         ```
     """
     ...
 
 def query_async(query: str, token: str | None = None) -> Future:
     """
-    Asynchronously executes a TQL (Tensor Query Language) query and returns a Future that will resolve into DatasetView.
+    Asynchronously executes TQL queries optimized for ML data filtering and search.
     
-    TQL provides SQL-like querying capabilities specifically designed for ML datasets, allowing you 
-    to filter, sort, and select data based on various criteria including vector similarity.
+    Non-blocking version of `query()` for better performance with large datasets.
+    Supports the same TQL features including vector similarity search, text search,
+    filtering, and joins.
 
     Args:
-        query: A TQL query string. The query can:
-            - Filter rows using WHERE clauses
-            - Sort results using ORDER BY
-            - Select specific columns using SELECT
-            - Perform vector similarity search using BM25_SIMILARITY
-            - Join multiple datasets
-        token: Optional Activeloop token for authentication. Not required if using environment 
-            credentials.
+        query: TQL query string supporting:
+            - Vector similarity: COSINE_SIMILARITY, EUCLIDEAN_DISTANCE
+            - Text search: BM25_SIMILARITY, CONTAINS
+            - Filtering: WHERE clauses
+            - Sorting: ORDER BY
+            - Joins: JOIN across datasets
+        token: Optional Activeloop authentication token
 
     Returns:
-        Future: A Future object that resolves to a DatasetView. The resulting view can be:
-            - Used directly for ML training
+        Future: Resolves to DatasetView that can be:
+            - Used directly in ML training
             - Further filtered with additional queries
             - Converted to PyTorch/TensorFlow dataloaders
             - Materialized into a new dataset
 
-    Examples:
-        Basic filtering with await:
-        ```python
-        # Select images with high confidence labels
-        view = await deeplake.query_async(f'SELECT * FROM "{ds_path}" WHERE confidence > 0.9')
-        
-        # Get samples from specific classes
-        cats = await deeplake.query_async(f'SELECT * FROM "{ds_path}" WHERE label IN (\'cat\', \'kitten\')')
-        ```
+    <!-- test-context
+    ```python
+    import deeplake
+    def prepare_training():
+        pass
+    ```
+    -->
 
-        Text similarity search with Future.result():
+    Examples:
+        Basic async query:
         ```python
-        # Find semantically similar text using BM25
-        future = deeplake.query_async(f'''
-            SELECT * FROM "{ds_path}"
-            ORDER BY BM25_SIMILARITY(text_column, 'query text') DESC 
-            LIMIT 100
+        # Run query asynchronously
+        future = deeplake.query_async('''
+            SELECT * FROM "mem://embeddings"
+            ORDER BY COSINE_SIMILARITY(vector, ARRAY[0.1, 0.2, 0.3]) DESC
         ''')
-        similar = future.result()  # Blocks until query completes
-        ```
 
-        Vector similarity search:
-        ```python
-        # Find nearest neighbor embeddings
-        neighbors = await deeplake.query_async(f'''
-            SELECT * FROM "{ds_path}"
-            ORDER BY COSINE_SIMILARITY(embedding, ARRAY[0.1, 0.2, ...]) DESC
-            LIMIT 10
-        ''')
-        ```
+        # Do other work while query runs
+        prepare_training()
 
-        Joins across datasets:
-        ```python
-        # Join images with their metadata
-        results = await deeplake.query_async(f'''
-            SELECT i.image, m.label, m.bbox 
-            FROM "{image_ds_path}" AS i
-            JOIN "{metadata_ds_path}" AS m ON i.id = m.image_id
-            WHERE m.verified = true
-        ''')
+        # Get results when needed
+        results = future.result()
         ```
 
-        Using with ML frameworks:
+        With async/await:
         ```python
-        # Filter dataset and create PyTorch dataloader
-        future = deeplake.query_async(f'SELECT * FROM "{ds_path}" WHERE split = \'train\'')
-        train_data = future.result()
-        train_loader = train_data.pytorch().dataloader(batch_size=32)
+        async def search_similar():
+            results = await deeplake.query_async('''
+                SELECT * FROM "mem://images"
+                ORDER BY COSINE_SIMILARITY(embedding, ARRAY[0.1, 0.2, 0.3]) DESC
+                LIMIT 100
+            ''')
+            return results
+
+        async def main():
+            similar = await search_similar()
         ```
 
         Non-blocking check:
         ```python
-        # Check if query is complete without blocking
-        future = deeplake.query_async(f'SELECT * FROM "{ds_path}"')
+        future = deeplake.query_async(
+            "SELECT * FROM dataset WHERE \\"split\\" = 'train'"
+        )
+        
         if future.is_completed():
-            results = future.result()
+            train_data = future.result()
+        else:
+            print("Query still running...")
         ```
     """
     ...
 
 class Client:
+    """
+    Client for connecting to Activeloop services.
+    Handles authentication and API communication.
+    """
     endpoint: str
 
 class Tag:
@@ -635,36 +903,46 @@ class ColumnView:
     - Access column metadata and properties
     - Get information about linked data if the column contains references
 
+    <!-- test-context
+    ```python
+    import deeplake
+    ds = deeplake.create("tmp://")
+    ds.add_column("images", "int32")
+    ds.append({"images": [0] * 300})
+    ds.add_column("embeddings", deeplake.types.Array("float32", 1))
+    ```
+    -->
+
     Examples:
-        Load image data from a column for training
+        Load image data from a column for training:
         ```python
         # Access a single image
-        image = dataset["images"][0]
+        image = ds["images"][0]
         
         # Load a batch of images
-        batch = dataset["images"][0:32]
+        batch = ds["images"][0:32]
         
         # Async load for better performance
-        images_future = dataset["images"].get_async(0:32)
+        images_future = ds["images"].get_async(slice(0, 32))
         images = images_future.result()
         ```
 
-        Access embeddings for similarity search
+        Access embeddings for similarity search:
         ```python
         # Get all embeddings
-        embeddings = dataset["embeddings"][:]
+        embeddings = ds["embeddings"][:]
         
         # Get specific embeddings by indices
-        selected = dataset["embeddings"][[1, 5, 10]]
+        selected = ds["embeddings"][[1, 5, 10]]
         ```
 
-        Check column properties
+        Check column properties:
         ```python
         # Get column name
-        name = dataset["images"].name
+        name = ds["images"].name
         
         # Access metadata
-        if "mean" in dataset["images"].metadata:
+        if "mean" in ds["images"].metadata.keys():
             mean = dataset["images"].metadata["mean"]
         ```
     """
@@ -682,6 +960,16 @@ class ColumnView:
         Returns:
             The data at the specified index/indices. Type depends on the column's data type.
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("images", "int32")
+        ds.append({"images": [0] * 300})
+        column = ds["images"]
+        ```
+        -->
+
         Examples:
             ```python
             # Get single item
@@ -710,14 +998,26 @@ class ColumnView:
         Returns:
             Future: A Future object that resolves to the requested data.
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("images", "int32")
+        ds.append({"images": [0] * 300})
+        column = ds["images"]
+        ```
+        -->
+
         Examples:
             ```python
             # Async batch load
-            future = column.get_async(0:32)
+            future = column.get_async(slice(0, 32))
             batch = future.result()
             
             # Using with async/await
-            batch = await column.get_async(0:32)
+            async def load_batch():
+                batch = await column.get_async(slice(0, 32))
+                return batch
             ```
         """
         ...
@@ -750,6 +1050,20 @@ class ColumnView:
         Access the column's metadata. Useful for storing statistics, preprocessing parameters,
         or other information about the column data.
 
+        Returns:
+            ReadOnlyMetadata: A ReadOnlyMetadata object for reading metadata.
+
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("images", "int32")
+        ds["images"].metadata["mean"] = [0.485, 0.456, 0.406]
+        ds["images"].metadata["std"] = [0.229, 0.224, 0.225]
+        column = ds["images"]
+        ```
+        -->
+
         Examples:
             ```python
             # Access preprocessing parameters
@@ -786,32 +1100,49 @@ class Column(ColumnView):
     - Access and modify column metadata
     - Handle various data types common in ML: images, embeddings, labels, etc.
 
+    <!-- test-context
+    ```python
+    import deeplake
+    ds = deeplake.create("tmp://")
+    ds.add_column("images", "int32")
+    ds.append({"images": [0] * 300})
+    ds.add_column("embeddings", deeplake.types.Array("float32", 1))
+    ds.add_column("labels", "int32")
+    new_labels = [1] * 32
+    images = [0] * 32
+    class Model:
+        def encode(self, images):
+            return [[0.1]] * len(images)
+    model = Model()
+    ```
+    -->
+
     Examples:
-        Update training labels
+        Update training labels:
         ```python
         # Update single label
-        dataset["labels"][0] = 1
+        ds["labels"][0] = 1
         
         # Update batch of labels
-        dataset["labels"][0:32] = new_labels
+        ds["labels"][0:32] = new_labels
         
         # Async update for better performance
-        future = dataset["labels"].set_async(0:32, new_labels)
+        future = ds["labels"].set_async(slice(0, 32), new_labels)
         future.wait()
         ```
 
-        Store image embeddings
+        Store image embeddings:
         ```python
         # Generate and store embeddings
         embeddings = model.encode(images)
-        dataset["embeddings"][0:len(embeddings)] = embeddings
+        ds["embeddings"][0:len(embeddings)] = embeddings
         ```
 
-        Manage column metadata
+        Manage column metadata:
         ```python
         # Store preprocessing parameters
-        dataset["images"].metadata["mean"] = [0.485, 0.456, 0.406]
-        dataset["images"].metadata["std"] = [0.229, 0.224, 0.225]
+        ds["images"].metadata["mean"] = [0.485, 0.456, 0.406]
+        ds["images"].metadata["std"] = [0.229, 0.224, 0.225]
         ```
     """
 
@@ -825,6 +1156,18 @@ class Column(ColumnView):
                 - slice: Range of indices (e.g., 0:10)
             value: The data to store. Must match the column's data type.
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("images", "int32")
+        ds.append({"images": [0] * 300})
+        column = ds["images"]
+        new_image = 1
+        new_batch = [1] * 32
+        ```
+        -->
+
         Examples:
             ```python
             # Update single item
@@ -850,14 +1193,27 @@ class Column(ColumnView):
         Returns:
             FutureVoid: A FutureVoid that completes when the update is finished.
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("images", "int32")
+        ds.append({"images": [0] * 300})
+        column = ds["images"]
+        new_image = 1
+        new_batch = [1] * 32
+        ```
+        -->
+
         Examples:
             ```python
             # Async batch update
-            future = column.set_async(0:32, new_batch)
+            future = column.set_async(slice(0, 32), new_batch)
             future.wait()
             
             # Using with async/await
-            await column.set_async(0:32, new_batch)
+            async def update_batch():
+                await column.set_async(slice(0, 32), new_batch)
             ```
         """
         ...
@@ -926,6 +1282,16 @@ class Row:
         Returns:
             Future: A Future object that will resolve to the value containing the column data.
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("column_name", "int32")
+        ds.append({"column_name": [0] * 300})
+        row = ds[0]
+        ```
+        -->
+
         Examples:
             ```python
             future = row.get_async("column_name")
@@ -955,6 +1321,17 @@ class Row:
         Returns:
             FutureVoid: A FutureVoid object that will resolve when the operation is complete.
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("column_name", "int32")
+        ds.append({"column_name": [0] * 300})
+        row = ds[0]
+        new_value = 1
+        ```
+        -->
+
         Examples:
             ```python
             future_void = row.set_async("column_name", new_value)
@@ -1004,6 +1381,16 @@ class RowRange:
         Returns:
             Future: A Future object that will resolve to the value containing the column data.
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("column_name", "int32")
+        ds.append({"column_name": [0] * 300})
+        row_range = ds[0:30]
+        ```
+        -->
+
         Examples:
             ```python
             future = row_range.get_async("column_name")
@@ -1033,6 +1420,17 @@ class RowRange:
         Returns:
             FutureVoid: A FutureVoid object that will resolve when the operation is complete.
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("column_name", "int32")
+        ds.append({"column_name": [0] * 300})
+        row_range = ds[0:30]
+        new_value = [1] * 30
+        ```
+        -->
+
         Examples:
             ```python
             future_void = row_range.set_async("column_name", new_value)
@@ -1085,6 +1483,16 @@ class RowRangeView:
         Returns:
             Future: A Future object that will resolve to the value containing the column data.
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("column_name", "int32")
+        ds.append({"column_name": [0] * 300})
+        row_range_view = ds[0:30]
+        ```
+        -->
+
         Examples:
             ```python
             future = row_range_view.get_async("column_name")
@@ -1118,6 +1526,16 @@ class RowView:
         Returns:
             Future: A Future object that will resolve to the value containing the column data.
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("column_name", "int32")
+        ds.append({"column_name": [0] * 300})
+        row_view = ds[0]
+        ```
+        -->
+
         Examples:
             ```python
             future = row_view.get_async("column_name")
@@ -1221,7 +1639,6 @@ class DatasetView:
                 # process row
                 pass
             ```
-
         """
         ...
 
@@ -1239,15 +1656,6 @@ class DatasetView:
             ```python
             ds.summary()
             ```
-
-            Example Output:
-            ```
-            Dataset length: 5
-            Columns:
-              id       : int64
-              title    : text
-              embedding: embedding(768)
-            ```
         """
         ...
 
@@ -1255,13 +1663,20 @@ class DatasetView:
         """
         Executes the given TQL query against the dataset and return the results as a [deeplake.DatasetView][].
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("category", "text")
+        ```
+        -->
+
         Examples:
             ```python
             result = ds.query("select * where category == 'active'")
             for row in result:
                 print("Id is: ", row["id"])
             ```
-
         """
         ...
 
@@ -1269,6 +1684,14 @@ class DatasetView:
         """
         Asynchronously executes the given TQL query against the dataset and return a future that will resolve into [deeplake.DatasetView][].
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("category", "text")
+        ```
+        -->
+
         Examples:
             ```python
             future = ds.query_async("select * where category == 'active'")
@@ -1276,11 +1699,12 @@ class DatasetView:
             for row in result:
                 print("Id is: ", row["id"])
 
-            # or use the Future in an await expression
-            future = ds.query_async("select * where category == 'active'")
-            result = await future
-            for row in result:
-                print("Id is: ", row["id"])
+            async def query_and_process():
+                # or use the Future in an await expression
+                future = ds.query_async("select * where category == 'active'")
+                result = await future
+                for row in result:
+                    print("Id is: ", row["id"])
             ```
         """
         ...
@@ -1304,14 +1728,23 @@ class DatasetView:
         Raises:
             ImportError: If TensorFlow is not installed
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("images", "int32")
+        ds.append({"images": [0] * 300})
+        def process_batch(batch):
+            pass
+        ```
+        -->
+
         Examples:
             ```python
-            ds = deeplake.open("path/to/dataset")
-            dl = ds.tensorflow().shuffle(500).batch(32).
-            for i_batch, sample_batched in enumerate(dataloader):
+            dl = ds.tensorflow().shuffle(500).batch(32)
+            for i_batch, sample_batched in enumerate(dl):
                  process_batch(sample_batched)
             ```
-
         """
         ...
 
@@ -1327,17 +1760,27 @@ class DatasetView:
         Raises:
             ImportError: If pytorch is not installed
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("images", "int32")
+        ds.append({"images": [0] * 300})
+
+        def process_batch(batch):
+            pass
+        ```
+        -->
+
         Examples:
             ```python
             from torch.utils.data import DataLoader
             
-            ds = deeplake.open("path/to/dataset")
-            dataloader = DataLoader(ds.pytorch(), batch_size=60,
-                                        shuffle=True, num_workers=10)
-            for i_batch, sample_batched in enumerate(dataloader):
-                 process_batch(sample_batched)
+            dl = DataLoader(ds.pytorch(), batch_size=60,
+                                        shuffle=True, num_workers=8)
+            for i_batch, sample_batched in enumerate(dl):
+                process_batch(sample_batched)
             ```
-
         """
         ...
 
@@ -1349,8 +1792,8 @@ class DatasetView:
             batch_size: Number of rows in each batch
             drop_last: Whether to drop the final batch if it is incomplete
 
-         Examples:
-            ```python
+        Examples:
+            ```python 
             ds = deeplake.open("al://my_org/dataset")
             batches = ds.batches(batch_size=2000, drop_last=True)
             for batch in batches:
@@ -1479,6 +1922,15 @@ class Dataset(DatasetView):
         - `tuple`: A tuple of indices specifying the rows to return. Returns a [deeplake.RowRange][]
         - `str`: A string specifying column to return all values from. Returns a [deeplake.Column][]
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("id", int)
+        ds.append({"id": [3] * 3000})
+        ```
+        -->
+
         Examples:
             ```python
             row = ds[318]
@@ -1493,7 +1945,6 @@ class Dataset(DatasetView):
 
             column_data = ds["id"]
             ```
-
         """
     ...
 
@@ -1507,7 +1958,6 @@ class Dataset(DatasetView):
                 # process row
                 pass
             ```
-
         """
         ...
 
@@ -1550,7 +2000,7 @@ class Dataset(DatasetView):
             ```python
             ds.add_column("labels", deeplake.types.Int32)
 
-            ds.add_column("labels", "int32")
+            ds.add_column("categories", "int32")
 
             ds.add_column("name", deeplake.types.Text())
 
@@ -1558,7 +2008,7 @@ class Dataset(DatasetView):
 
             ds.add_column("images", deeplake.types.Image(dtype=deeplake.types.UInt8(), sample_compression="jpeg"))
 
-            ds.add_column("embedding", deeplake.types.Embedding(dtype=deeplake.types.Float32(), dimensions=768))
+            ds.add_column("embedding", deeplake.types.Embedding(size=768))
             ```
 
         Raises:
@@ -1572,6 +2022,14 @@ class Dataset(DatasetView):
         Args:
             name: The name of the column to remove
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("name", "text")
+        ```
+        -->
+
         Examples:
             ```python
             ds.remove_column("name")
@@ -1589,6 +2047,14 @@ class Dataset(DatasetView):
             name: The name of the column to rename
             new_name: The new name to set to column
 
+        <!-- test-context
+        ```python
+        import deeplake
+        ds = deeplake.create("tmp://")
+        ds.add_column("old_name", "text")
+        ```
+        -->
+
         Examples:
             ```python
             ds.rename_column("old_name", "new_name")
@@ -1620,21 +2086,37 @@ class Dataset(DatasetView):
         Args:
             data: The data to insert into the dataset.
 
+        <!-- test-context
+        ```python
+        import deeplake
+        import numpy as np
+        ds = deeplake.create("tmp://")
+        ds.add_column("name", "text")
+        ds.add_column("age", "int32")
+        ds2 = deeplake.create("tmp://")
+        ds2.add_column("text", "text")
+        ds2.add_column("embedding", deeplake.types.Embedding(size=768))
+        deeplake.from_parquet = lambda x: ds2
+        ```
+        -->
+
         Examples:
             ```python
             ds.append({"name": ["Alice", "Bob"], "age": [25, 30]})
 
             ds.append([{"name": "Alice", "age": 25}, {"name": "Bob", "age": 30}])
+            ```
 
-            ds.append({
+            ```python
+            ds2.append({
                 "embedding": np.random.rand(4, 768),
                 "text": ["Hello World"] * 4})
 
-            ds.append([{"embedding": np.random.rand(768), "text": "Hello World"}] * 4)
+            ds2.append([{"embedding": np.random.rand(768), "text": "Hello World"}] * 4)
             ```
 
             ```python
-            ds.append(deeplake.from_parquet("./file.parquet"))
+            ds2.append(deeplake.from_parquet("./file.parquet"))
             ```
 
         Raises:
@@ -1662,12 +2144,9 @@ class Dataset(DatasetView):
         Examples:
             ```python
             ds.commit()
-            ```
 
-            ```python
             ds.commit("Added data from updated documents")
             ```
-
         """
 
     def commit_async(self, message: str | None = None) -> FutureVoid:
@@ -1682,21 +2161,12 @@ class Dataset(DatasetView):
         Examples:
             ```python
             ds.commit_async().wait()
-            ```
 
-            ```python
             ds.commit_async("Added data from updated documents").wait()
-            ```
 
-            ```python
-            await ds.commit_async()
-            ```
-
-            ```python
-            await ds.commit_async("Added data from updated documents")
-            ```
+            async def do_commit():
+                await ds.commit_async()
 
-            ```python
             future = ds.commit_async() # then you can check if the future is completed using future.is_completed()
             ```
         """
@@ -1807,7 +2277,6 @@ class ReadOnlyDataset(DatasetView):
                 # process row
                 pass
             ```
-
         """
         ...
 
@@ -1953,6 +2422,12 @@ class InvalidPolygonShapeError(Exception):
 class InvalidLinkDataError(Exception):
     pass
 
+class InvalidCredsKeyAssignmentError(Exception):
+    pass
+
+class CredsKeyAlreadyAssignedError(Exception):
+    pass
+
 class GcsStorageProviderFailed(Exception):
     pass
 
@@ -2055,12 +2530,6 @@ class UnsupportedChunkCompression(Exception):
 class InvalidImageCompression(Exception):
     pass
 
-class InvalidCredsKeyAssignmentError(Exception):
-    pass
-
-class CredsKeyAlreadyAssignedError(Exception):
-    pass
-
 class InvalidSegmentMaskCompression(Exception):
     pass
 
@@ -2206,11 +2675,21 @@ def create(
         token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
         schema (dict): The initial schema to use for the dataset. See `deeplake.schema` such as [deeplake.schemas.TextEmbeddings][] for common starting schemas.
 
+    <!-- test-context
+    ```python
+    import deeplake
+    from deeplake import types
+    ds = deeplake.create("tmp://")
+    def create(path, creds = None, token = None, org_id = None):
+        return ds
+    deeplake.create = create
+    key = ''
+    id = ''
+    ```
+    -->
+
     Examples:
         ```python
-        import deeplake
-        from deeplake import types
-        
         # Create a dataset in your local filesystem:
         ds = deeplake.create("directory_path")
         ds.add_column("id", types.Int32())
@@ -2219,42 +2698,23 @@ def create(
         ds.commit()
         ds.summary()
         ```
-        Output:
-        ```
-        Dataset length: 0
-        Columns:
-          id       : int32
-          url      : text
-          embedding: embedding(768)
-        ```
 
         ```python
         # Create dataset in your app.activeloop.ai organization:
         ds = deeplake.create("al://organization_id/dataset_name")
-        ```
 
-        ```python
         # Create a dataset stored in your cloud using specified credentials:
         ds = deeplake.create("s3://mybucket/my_dataset",
-            creds = {"aws_access_key_id": ..., ...})
-        ```
+            creds = {"aws_access_key_id": id, "aws_secret_access_key": key})
 
-        ```python
         # Create dataset stored in your cloud using app.activeloop.ai managed credentials.
         ds = deeplake.create("s3://mybucket/my_dataset",
             creds = {"creds_key": "managed_creds_key"}, org_id = "my_org_id")
-        ```
 
-        ```python
-        # Create dataset stored in your cloud using app.activeloop.ai managed credentials.
         ds = deeplake.create("azure://bucket/path/to/dataset")
-        ```
 
-        ```python
         ds = deeplake.create("gcs://bucket/path/to/dataset")
-        ```
 
-        ```python
         ds = deeplake.create("mem://in-memory")
         ```
 
@@ -2275,54 +2735,50 @@ def create_async(
 
     To open an existing dataset, use [deeplake.open_async][].
 
+    <!-- test-context
+    ```python
+    import deeplake
+    from deeplake import types
+    ds = deeplake.create_async("tmp://")
+    def create(path, creds = None, token = None, org_id = None):
+        return ds
+    deeplake.create_async = create
+    key = ''
+    id = ''
+    ```
+    -->
+
     Examples:
         ```python
-        import deeplake
-        from deeplake import types
-        
-        # Asynchronously create a dataset in your local filesystem:
-        ds = await deeplake.create_async("directory_path")
-        await ds.add_column("id", types.Int32())
-        await ds.add_column("url", types.Text())
-        await ds.add_column("embedding", types.Embedding(768))
-        await ds.commit()
-        await ds.summary()  # Example of usage in an async context
-        ```
+        async def create_dataset():
+            # Asynchronously create a dataset in your local filesystem:
+            ds = await deeplake.create_async("directory_path")
+            await ds.add_column("id", types.Int32())
+            await ds.add_column("url", types.Text())
+            await ds.add_column("embedding", types.Embedding(768))
+            await ds.commit()
+            await ds.summary()  # Example of usage in an async context
 
-        ```python
-        # Alternatively, create a dataset using .result().
-        future_ds = deeplake.create_async("directory_path")
-        ds = future_ds.result()  # Blocks until the dataset is created
-        ```
+            # Alternatively, create a dataset using .result().
+            future_ds = deeplake.create_async("directory_path")
+            ds = future_ds.result()  # Blocks until the dataset is created
 
-        ```python
-        # Create a dataset in your app.activeloop.ai organization:
-        ds = await deeplake.create_async("al://organization_id/dataset_name")
-        ```
+            # Create a dataset in your app.activeloop.ai organization:
+            ds = await deeplake.create_async("al://organization_id/dataset_name")
 
-        ```python
-        # Create a dataset stored in your cloud using specified credentials:
-        ds = await deeplake.create_async("s3://mybucket/my_dataset",
-            creds={"aws_access_key_id": ..., ...})
-        ```
+            # Create a dataset stored in your cloud using specified credentials:
+            ds = await deeplake.create_async("s3://mybucket/my_dataset",
+                creds={"aws_access_key_id": id, "aws_secret_access_key": key})
 
-        ```python
-        # Create dataset stored in your cloud using app.activeloop.ai managed credentials.
-        ds = await deeplake.create_async("s3://mybucket/my_dataset",
-            creds={"creds_key": "managed_creds_key"}, org_id="my_org_id")
-        ```
+            # Create dataset stored in your cloud using app.activeloop.ai managed credentials.
+            ds = await deeplake.create_async("s3://mybucket/my_dataset",
+                creds={"creds_key": "managed_creds_key"}, org_id="my_org_id")
 
-        ```python
-        # Create dataset stored in your cloud using app.activeloop.ai managed credentials.
-        ds = await deeplake.create_async("azure://bucket/path/to/dataset")
-        ```
+            ds = await deeplake.create_async("azure://bucket/path/to/dataset")
 
-        ```python
-        ds = await deeplake.create_async("gcs://bucket/path/to/dataset")
-        ```
+            ds = await deeplake.create_async("gcs://bucket/path/to/dataset")
 
-        ```python
-        ds = await deeplake.create_async("mem://in-memory")
+            ds = await deeplake.create_async("mem://in-memory")
         ```
 
     Raises:
@@ -2348,11 +2804,18 @@ def copy(
         dst_creds (dict, str, optional): The string ``ENV`` or a dictionary containing credentials used to access the destination dataset at the path.
         token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
 
+    <!-- test-context
+    ```python
+    import deeplake
+    from deeplake import types
+    deeplake.copy = lambda src, dst: None
+    ```
+    -->
+
     Examples:
         ```python
         deeplake.copy("al://organization_id/source_dataset", "al://organization_id/destination_dataset")
         ```
-
     """
 
 def delete(
@@ -2388,7 +2851,7 @@ def open(
 
     See [deeplake.open_read_only][] for opening the dataset in read only mode
 
-    To create a new dataset, see [deeplake.open][]
+    To create a new dataset, see [deeplake.create][]
 
     Args:
         url: The URL of the dataset. URLs can be specified using the following protocols:
@@ -2410,33 +2873,32 @@ def open(
             - If nothing is given is, credentials are fetched from the environment variables. This is also the case when creds is not passed for cloud datasets
         token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
 
+    <!-- test-context
+    ```python
+    import deeplake
+    deeplake.open = lambda url, creds = None, token = None, org_id = None: None
+    id = ''
+    key = ''
+    ```
+    -->
+
     Examples:
         ```python
         # Load dataset managed by Deep Lake.
         ds = deeplake.open("al://organization_id/dataset_name")
-        ```
 
-        ```python
         # Load dataset stored in your cloud using your own credentials.
         ds = deeplake.open("s3://bucket/my_dataset",
-            creds = {"aws_access_key_id": ..., ...})
-        ```
+            creds = {"aws_access_key_id": id, "aws_secret_access_key": key})
 
-        ```python
         # Load dataset stored in your cloud using Deep Lake managed credentials.
         ds = deeplake.open("s3://bucket/my_dataset",
-            ...creds = {"creds_key": "managed_creds_key"}, org_id = "my_org_id")
-        ```
+            creds = {"creds_key": "managed_creds_key"}, org_id = "my_org_id")
 
-        ```python
         ds = deeplake.open("s3://bucket/path/to/dataset")
-        ```
 
-        ```python
         ds = deeplake.open("azure://bucket/path/to/dataset")
-        ```
 
-        ```python
         ds = deeplake.open("gcs://bucket/path/to/dataset")
         ```
     """
@@ -2451,38 +2913,27 @@ def open_async(
 
     Examples:
         ```python
-        # Asynchronously load dataset managed by Deep Lake using await.
-        ds = await deeplake.open_async("al://organization_id/dataset_name")
-        ```
+        async def async_open():
+            # Asynchronously load dataset managed by Deep Lake using await.
+            ds = await deeplake.open_async("al://organization_id/dataset_name")
 
-        ```python
-        # Asynchronously load dataset stored in your cloud using your own credentials.
-        ds = await deeplake.open_async("s3://bucket/my_dataset",
-            creds={"aws_access_key_id": ..., ...})
-        ```
+            # Asynchronously load dataset stored in your cloud using your own credentials.
+            ds = await deeplake.open_async("s3://bucket/my_dataset",
+                creds={"aws_access_key_id": id, "aws_secret_access_key": key})
 
-        ```python
-        # Asynchronously load dataset stored in your cloud using Deep Lake managed credentials.
-        ds = await deeplake.open_async("s3://bucket/my_dataset",
-            creds={"creds_key": "managed_creds_key"}, org_id="my_org_id")
-        ```
+            # Asynchronously load dataset stored in your cloud using Deep Lake managed credentials.
+            ds = await deeplake.open_async("s3://bucket/my_dataset",
+                creds={"creds_key": "managed_creds_key"}, org_id="my_org_id")
 
-        ```python
-        ds = await deeplake.open_async("s3://bucket/path/to/dataset")
-        ```
+            ds = await deeplake.open_async("s3://bucket/path/to/dataset")
 
-        ```python
-        ds = await deeplake.open_async("azure://bucket/path/to/dataset")
-        ```
+            ds = await deeplake.open_async("azure://bucket/path/to/dataset")
 
-        ```python
-        ds = await deeplake.open_async("gcs://bucket/path/to/dataset")
-        ```
+            ds = await deeplake.open_async("gcs://bucket/path/to/dataset")
 
-        ```python
-        # Alternatively, load the dataset using .result().
-        future_ds = deeplake.open_async("al://organization_id/dataset_name")
-        ds = future_ds.result()  # Blocks until the dataset is loaded
+            # Alternatively, load the dataset using .result().
+            future_ds = deeplake.open_async("al://organization_id/dataset_name")
+            ds = future_ds.result()  # Blocks until the dataset is loaded
         ```
     """
 
@@ -2509,12 +2960,18 @@ def like(
             - If nothing is given is, credentials are fetched from the environment variables. This is also the case when creds is not passed for cloud datasets
         token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
 
+    <!-- test-context
+    ```python
+    import deeplake
+    deeplake.like = lambda src, dest, creds = None, token = None: None
+    ```
+    -->
+
     Examples:
         ```python
         ds = deeplake.like(src="az://bucket/existing/to/dataset",
            dest="s3://bucket/new/dataset")
         ```
-
     """
 
 def connect(
@@ -2538,33 +2995,30 @@ def connect(
         creds_key (str, optional): The creds_key of the managed credentials that will be used to access the source path. If not set, use the organization's default credentials.
         token (str, optional): Activeloop token used to fetch the managed credentials.
 
+    <!-- test-context
+    ```python
+    import deeplake
+    deeplake.connect = lambda src, dest = None, org_id = None, creds_key = None, token = None: None
+    ```
+    -->
+
     Examples:
         ```python
         ds = deeplake.connect("s3://bucket/path/to/dataset",
             "al://my_org/dataset")
-        ```
 
-        ```python
         ds = deeplake.connect("s3://bucket/path/to/dataset",
             "al://my_org/dataset", creds_key="my_key")
-        ```
 
-        ```python
         # Connect the dataset as al://my_org/dataset
         ds = deeplake.connect("s3://bucket/path/to/dataset",
             org_id="my_org")
-        ```
 
-        ```python
         ds = deeplake.connect("az://bucket/path/to/dataset",
             "al://my_org/dataset", creds_key="my_key")
-        ```
 
-        ```python
         ds = deeplake.connect("gcs://bucket/path/to/dataset",
             "al://my_org/dataset", creds_key="my_key")
-        ```
-
     """
 
 def disconnect(url: str, token: str | None = None) -> None:
@@ -2584,7 +3038,6 @@ def disconnect(url: str, token: str | None = None) -> None:
         ```python
         deeplake.disconnect("al://my_org/dataset_name")
         ```
-
     """
 
 def open_read_only(
@@ -2618,39 +3071,26 @@ def open_read_only(
         token (str, optional): Activeloop token to authenticate user.
 
     Examples:
-        ```python
+
         ds = deeplake.open_read_only("directory_path")
         ds.summary()
-        ```
 
         Example Output:
-        ```
         Dataset length: 5
         Columns:
           id       : int32
           url      : text
           embedding: embedding(768)
-        ```
 
-        ```python
         ds = deeplake.open_read_only("file:///path/to/dataset")
-        ```
 
-        ```python
         ds = deeplake.open_read_only("s3://bucket/path/to/dataset")
-        ```
 
-        ```python
         ds = deeplake.open_read_only("azure://bucket/path/to/dataset")
-        ```
 
-        ```python
         ds = deeplake.open_read_only("gcs://bucket/path/to/dataset")
-        ```
 
-        ```python
         ds = deeplake.open_read_only("mem://in-memory")
-        ```
     """
 
 def open_read_only_async(
@@ -2662,36 +3102,69 @@ def open_read_only_async(
     See [deeplake.open_async][] for opening datasets for modification and [deeplake.open_read_only][] for sync open.
 
     Examples:
-        ```python
+
         # Asynchronously open a dataset in read-only mode:
         ds = await deeplake.open_read_only_async("directory_path")
-        ```
 
-        ```python
         # Alternatively, open the dataset using .result().
         future_ds = deeplake.open_read_only_async("directory_path")
         ds = future_ds.result()  # Blocks until the dataset is loaded
-        ```
 
-        ```python
         ds = await deeplake.open_read_only_async("file:///path/to/dataset")
-        ```
 
-        ```python
         ds = await deeplake.open_read_only_async("s3://bucket/path/to/dataset")
-        ```
 
-        ```python
         ds = await deeplake.open_read_only_async("azure://bucket/path/to/dataset")
-        ```
 
-        ```python
         ds = await deeplake.open_read_only_async("gcs://bucket/path/to/dataset")
-        ```
 
-        ```python
         ds = await deeplake.open_read_only_async("mem://in-memory")
+    """
+
+def convert(
+    src: str,
+    dst: str, 
+    dst_creds: dict[str, str] | None = None,
+    token: str | None = None
+) -> None:
+    """
+    Converts a Deep Lake v3 dataset to the new v4 format while preserving data and metadata.
+    Optimized for ML workloads with efficient handling of large datasets and linked data.
+
+    Args:
+        src: URL of the source v3 dataset to convert
+        dst: Destination URL for the new v4 dataset. Supports:
+            - `file://path` local storage
+            - `s3://bucket/path` S3 storage
+            - `gs://bucket/path` Google Cloud storage
+            - `azure://bucket/path` Azure storage 
+        dst_creds: Optional credentials for accessing the destination storage.
+            Supports cloud provider credentials like access keys
+        token: Optional Activeloop authentication token
+
+    <-- test-context
+    ```python
+    import deeplake
+    deeplake.convert = lambda src, dst, dst_creds = None, token = None: None
+    ```
+    -->
+
+    Examples:
+        ```python
+        # Convert local dataset
+        deeplake.convert("old_dataset/", "new_dataset/")
+        
+        # Convert cloud dataset with credentials
+        deeplake.convert(
+            "s3://old-bucket/dataset",
+            "s3://new-bucket/dataset", 
+            dst_creds={"aws_access_key_id": "key", 
+                      "aws_secret_access_key": "secret"}
+        )
         ```
+
+    Notes:
+        - You can open v3 dataset without converting it to v4 using `deeplake.query('SELECT * FROM "old_dataset/"')`
     """
 
 def from_parquet(url: str) -> ReadOnlyDataset:
@@ -2702,6 +3175,4 @@ def from_parquet(url: str) -> ReadOnlyDataset:
         url: The URL of the Parquet dataset. If no protocol is specified, it assumes `file://`
     """
 
-def __child_atfork() -> None: ...
-def __parent_atfork() -> None: ...
 def __prepare_atfork() -> None: ...
diff --git a/python/deeplake/ingestion/__init__.py b/python/deeplake/ingestion/__init__.py
new file mode 100644
index 0000000000..698af773c9
--- /dev/null
+++ b/python/deeplake/ingestion/__init__.py
@@ -0,0 +1 @@
+from deeplake.ingestion.coco.ingest_coco import ingest_coco
\ No newline at end of file
diff --git a/python/deeplake/ingestion/coco/__init__.py b/python/deeplake/ingestion/coco/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/deeplake/ingestion/coco/exceptions.py b/python/deeplake/ingestion/coco/exceptions.py
new file mode 100644
index 0000000000..4703dc9a4f
--- /dev/null
+++ b/python/deeplake/ingestion/coco/exceptions.py
@@ -0,0 +1,11 @@
+class CocoAnnotationMissingError(Exception):
+    def __init__(self, keys):
+        super().__init__(
+            (
+                "COCO dataset ingestion expects to have `instances`, `keypoints` and `stuff`. "
+                "{} {} missing."
+            ).format(
+                f"Key {keys[0]}" if len(keys) == 1 else f"Keys {', '.join(keys)}",
+                "is" if len(keys) == 1 else "are",
+            )
+        )
diff --git a/python/deeplake/ingestion/coco/ingest_coco.py b/python/deeplake/ingestion/coco/ingest_coco.py
new file mode 100644
index 0000000000..f3fe86db84
--- /dev/null
+++ b/python/deeplake/ingestion/coco/ingest_coco.py
@@ -0,0 +1,366 @@
+from typing import Union, Optional, List, Dict
+import pathlib
+from deeplake.ingestion.coco.exceptions import CocoAnnotationMissingError
+import deeplake as dp
+import numpy as np
+from tqdm import tqdm
+import os
+
+COCO_REQUIRED_KEYS = ["instances", "keypoints", "stuff"]
+MASKS_NOTE = "All segmentation polygons and RLEs were converted to stacked binary masks"
+
+
+def convert_pathlib_to_string_if_needed(path: Union[str, pathlib.Path]) -> str:
+    if isinstance(path, pathlib.Path):
+        path = str(path)
+    return path
+
+
+def verify_coco_annotation_dict(
+    annotation_files: Dict[str, Union[str, pathlib.Path]] = {}
+):
+    if all(key in annotation_files for key in COCO_REQUIRED_KEYS):
+        return {
+            key: convert_pathlib_to_string_if_needed(value)
+            for key, value in annotation_files.items()
+        }
+    else:
+        raise CocoAnnotationMissingError(
+            list(COCO_REQUIRED_KEYS - annotation_files.keys())
+        )
+
+
+class COCOStructuredDataset:
+    def __init__(
+        self,
+        dataset: dp.Dataset = None,
+        images_directory: Union[str, pathlib.Path] = None,
+        annotation_files: Dict[str, Union[str, pathlib.Path]] = {},
+    ):
+        from pycocotools.coco import COCO
+
+        self.dataset = dataset
+        self.images_directory = images_directory
+        self.annotation_files = annotation_files
+
+        self.coco = COCO(self.annotation_files["instances"])
+        self.coco_kp = COCO(self.annotation_files["keypoints"])
+        self.coco_stuff = COCO(self.annotation_files["stuff"])
+
+        self.category_info = self.coco.loadCats(self.coco.getCatIds())
+        self.category_info_kp = self.coco_kp.loadCats(self.coco_kp.getCatIds())
+        self.category_info_stuff = self.coco_stuff.loadCats(self.coco_stuff.getCatIds())
+        self.img_ids = sorted(self.coco.getImgIds())  # Image ids for uploading
+
+        self.cat_names = [category["name"] for category in self.category_info]
+        self.super_cat_names = list(
+            set([category["supercategory"] for category in self.category_info])
+        )
+        self.cat_names_kp = [category["name"] for category in self.category_info_kp]
+        self.super_cat_names_kp = list(
+            set([category["supercategory"] for category in self.category_info_kp])
+        )
+        self.cat_names_stuff = [
+            category["name"] for category in self.category_info_stuff
+        ]
+        self.super_cat_names_stuff = list(
+            set([category["supercategory"] for category in self.category_info_stuff])
+        )
+
+    def get_kp_group_data(self, height, width, anns_kp):
+        # Iterate through keypoints and parse each
+        categories_kp = np.zeros((len(anns_kp)))
+        supercats_kp = np.zeros((len(anns_kp)))
+        masks_kp = np.zeros((height, width, len(anns_kp)))
+        boxes_kp = np.zeros((len(anns_kp), 4))
+        keypoints_kp = np.zeros((51, len(anns_kp)))
+
+        for j, ann_kp in enumerate(anns_kp):
+            categories_kp[j] = self.cat_names_kp.index(
+                [
+                    self.category_info_kp[i]["name"]
+                    for i in range(len(self.category_info_kp))
+                    if self.category_info_kp[i]["id"] == ann_kp["category_id"]
+                ][0]
+            )
+            supercats_kp[j] = self.super_cat_names_kp.index(
+                [
+                    self.category_info_kp[i]["supercategory"]
+                    for i in range(len(self.category_info_kp))
+                    if self.category_info_kp[i]["id"] == ann_kp["category_id"]
+                ][0]
+            )
+            mask_kp = self.coco.annToMask(ann_kp)  # Convert annotation to mask
+            masks_kp[:, :, j] = mask_kp
+            boxes_kp[j, :] = ann_kp["bbox"]
+            keypoints_kp[:, j] = np.array(ann_kp["keypoints"])
+
+        return categories_kp, supercats_kp, masks_kp, boxes_kp, keypoints_kp
+
+    def get_stuff_group_data(self, height, width, ann, anns_stuff):
+        # Iterate through stuff and parse each
+        masks_stuff = np.zeros((height, width, len(anns_stuff)))
+        boxes_stuff = np.zeros((len(anns_stuff), 4))
+        categories_stuff = np.zeros((len(anns_stuff)))
+        areas_stuff = np.zeros((len(anns_stuff)))
+        iscrowds_stuff = np.zeros((len(anns_stuff)))
+        supercats_stuff = np.zeros((len(anns_stuff)))
+
+        for k, ann_stuff in enumerate(anns_stuff):
+            mask_stuff = self.coco.annToMask(ann_stuff)  # Convert annotation to mask
+            masks_stuff[:, :, k] = mask_stuff
+            boxes_stuff[k, :] = ann["bbox"]
+
+            # Do a brute force search and make no assumptions between order of relationship of category ids
+            categories_stuff[k] = self.cat_names_stuff.index(
+                [
+                    self.category_info_stuff[i]["name"]
+                    for i in range(len(self.category_info_stuff))
+                    if self.category_info_stuff[i]["id"] == ann_stuff["category_id"]
+                ][0]
+            )
+            supercats_stuff[k] = self.super_cat_names_stuff.index(
+                [
+                    self.category_info_stuff[i]["supercategory"]
+                    for i in range(len(self.category_info_stuff))
+                    if self.category_info_stuff[i]["id"] == ann_stuff["category_id"]
+                ][0]
+            )
+
+            areas_stuff[k] = ann_stuff["area"]
+            iscrowds_stuff[k] = ann_stuff["iscrowd"]
+
+            if "segmentation" not in ann_stuff:
+                print("----No segmentation found. Exiting.------")
+                print("Annotation length: {}".format(len(anns_stuff)))
+                print("----image id: {}----".format(img_id))
+                print("----Exiting.------")
+
+        return (
+            masks_stuff,
+            boxes_stuff,
+            categories_stuff,
+            areas_stuff,
+            iscrowds_stuff,
+            supercats_stuff,
+        )
+
+    def create_structure(self):
+        self.dataset.add_column(
+            "images", dp.types.Image(dp.types.UInt8(), sample_compression="jpg")
+        )
+        self.dataset.add_column("masks", dp.types.BinaryMask(sample_compression="lz4"))
+        self.dataset.add_column(
+            "boxes", dp.types.BoundingBox(dp.types.Float32(), "ltrb", "pixel")
+        )
+        self.dataset.add_column(
+            "categories", dp.types.ClassLabel(dp.types.Array("uint32", 1))
+        )
+        self.dataset["categories"].metadata["class_names"] = self.cat_names
+        self.dataset.add_column(
+            "super_categories", dp.types.ClassLabel(dp.types.Array("uint32", 1))
+        )
+        self.dataset["super_categories"].metadata["class_names"] = self.super_cat_names
+        self.dataset.add_column("areas", dp.types.Array("uint32", 1))
+        self.dataset.add_column("iscrowds", dp.types.Array("bool", 1))
+        self.dataset.add_column("images_meta", dp.types.Dict())
+
+        # Pose
+        self.dataset.add_column(
+            "pose/categories", dp.types.ClassLabel(dp.types.Array("uint32", 1))
+        )
+        self.dataset["pose/categories"].metadata["class_names"] = self.cat_names_kp
+        self.dataset.add_column(
+            "pose/super_categories", dp.types.ClassLabel(dp.types.Array("uint32", 1))
+        )
+        self.dataset["pose/super_categories"].metadata[
+            "class_names"
+        ] = self.super_cat_names_kp
+        self.dataset.add_column(
+            "pose/boxes", dp.types.BoundingBox(dp.types.Float32(), "LTWH", "pixel")
+        )
+        self.dataset.add_column(
+            "pose/keypoints", dp.types.Array("int32", 2)
+        )  # htype="keypoints_coco"
+        self.dataset.add_column(
+            "pose/masks", dp.types.BinaryMask(sample_compression="lz4")
+        )
+
+        # Stuff
+        self.dataset.add_column(
+            "stuff/masks", dp.types.BinaryMask(sample_compression="lz4")
+        )
+        self.dataset.add_column(
+            "stuff/boxes", dp.types.BoundingBox(dp.types.Float32(), "LTWH", "pixel")
+        )
+        self.dataset.add_column(
+            "stuff/categories", dp.types.ClassLabel(dp.types.Array("uint32", 1))
+        )
+        self.dataset["stuff/categories"].metadata["class_names"] = self.cat_names_stuff
+        self.dataset.add_column(
+            "stuff/super_categories", dp.types.ClassLabel(dp.types.Array("uint32", 1))
+        )
+        self.dataset["stuff/super_categories"].metadata[
+            "class_names"
+        ] = self.super_cat_names_stuff
+        self.dataset.add_column("stuff/areas", dp.types.Array("uint32", 1))
+        self.dataset.add_column("stuff/iscrowds", dp.types.Array("bool", 1))
+
+        # update metadatas
+        self.dataset["categories"].metadata["category_info"] = self.category_info
+        self.dataset["categories"].metadata[
+            "notes"
+        ] = "Numeric labels for categories represent the position of the class in the ds[categories].medatata['class_names'] list, and not the COCO category id."
+        self.dataset["super_categories"].metadata["category_info"] = self.category_info
+        self.dataset["super_categories"].metadata[
+            "notes"
+        ] = "Numeric labels for categories represent the position of the class in the ds[super_categories].medatata['class_names'] list, and not the COCO category id."
+
+        self.dataset["masks"].metadata["notes"] = MASKS_NOTE
+        self.dataset["pose/masks"].metadata["category_info"] = self.category_info_kp
+        self.dataset["pose/masks"].metadata["notes"] = MASKS_NOTE
+        self.dataset["pose/keypoints"].metadata["keypoints"] = [
+            category["keypoints"] for category in self.category_info_kp
+        ][0]
+        self.dataset["pose/keypoints"].metadata["connections"] = [
+            category["skeleton"] for category in self.category_info_kp
+        ][0]
+
+        self.dataset["stuff/masks"].metadata["category_info"] = self.category_info_stuff
+        self.dataset["stuff/masks"].metadata["notes"] = MASKS_NOTE
+
+    def ingest_columns(self):
+        for ii, img_id in enumerate(tqdm(self.img_ids), start=1):
+            ann_ids = self.coco.getAnnIds(img_id)
+            ann_ids_kp = self.coco_kp.getAnnIds(img_id)
+            ann_ids_stuff = self.coco_stuff.getAnnIds(img_id)
+            anns = self.coco.loadAnns(ann_ids)
+            anns_kp = self.coco_kp.loadAnns(ann_ids_kp)
+            anns_stuff = self.coco_stuff.loadAnns(ann_ids_stuff)
+
+            img_coco = self.coco.loadImgs(img_id)[0]
+            img_path = os.path.join(self.images_directory, img_coco["file_name"])
+            with open(img_path, "rb") as file:
+                image_bytes = file.read()
+            (height, width) = (img_coco["height"], img_coco["width"])
+            masks = np.zeros((height, width, len(anns)))
+            boxes = np.zeros((len(anns), 4))
+            categories = np.zeros((len(anns)))
+            areas = np.zeros((len(anns)))
+            iscrowds = np.zeros((len(anns)))
+            supercats = np.zeros((len(anns)))
+
+            for i, ann in enumerate(anns):
+                mask = self.coco.annToMask(ann)
+                masks[:, :, i] = mask
+                boxes[i, :] = ann["bbox"]
+
+                categories[i] = self.cat_names.index(
+                    [
+                        self.category_info[i]["name"]
+                        for i in range(len(self.category_info))
+                        if self.category_info[i]["id"] == ann["category_id"]
+                    ][0]
+                )
+                supercats[i] = self.super_cat_names.index(
+                    [
+                        self.category_info[i]["supercategory"]
+                        for i in range(len(self.category_info))
+                        if self.category_info[i]["id"] == ann["category_id"]
+                    ][0]
+                )
+
+                areas[i] = ann["area"]
+                iscrowds[i] = ann["iscrowd"]
+
+                if "segmentation" not in ann:
+                    print("----No segmentation found. Exiting.------")
+                    print("Annotation length: {}".format(len(anns)))
+                    print("----image id: {}----".format(img_id))
+                    print("----Exiting.------")
+
+            (categories_kp, supercats_kp, masks_kp, boxes_kp, keypoints_kp) = (
+                self.get_kp_group_data(height, width, anns_kp)
+            )
+
+            (
+                masks_stuff,
+                boxes_stuff,
+                categories_stuff,
+                areas_stuff,
+                iscrowds_stuff,
+                supercats_stuff,
+            ) = self.get_stuff_group_data(height, width, ann, anns_stuff)
+
+            in_dict = {
+                "images": [image_bytes],
+                "images_meta": [img_coco],
+                "masks": [masks.astype("bool")],
+                "boxes": [boxes.astype("float32")],
+                "categories": [categories.astype("uint32")],
+                "super_categories": [supercats.astype("uint32")],
+                "areas": [areas.astype("uint32")],
+                "iscrowds": [iscrowds.astype("bool")],
+                "pose/categories": [categories_kp.astype("uint32")],
+                "pose/super_categories": [supercats_kp.astype("uint32")],
+                "pose/boxes": [boxes_kp.astype("float32")],
+                "pose/masks": [masks_kp.astype("bool")],
+                "pose/keypoints": [keypoints_kp.astype("int32")],
+                "stuff/masks": [masks_stuff.astype("bool")],
+                "stuff/boxes": [boxes_stuff.astype("float32")],
+                "stuff/categories": [categories_stuff.astype("uint32")],
+                "stuff/super_categories": [supercats_stuff.astype("uint32")],
+                "stuff/areas": [areas_stuff.astype("uint32")],
+                "stuff/iscrowds": [iscrowds_stuff.astype("bool")],
+            }
+            self.dataset.append(in_dict)
+        self.dataset.commit("Finished ingestion")
+
+    def structure(self):
+        self.create_structure()
+        self.ingest_columns()
+
+
+def ingest_coco(
+    images_directory: Union[str, pathlib.Path],
+    annotation_files: Dict[str, Union[str, pathlib.Path]],
+    dest: Union[str, pathlib.Path],
+    dest_creds: Optional[Dict[str, str]] = None,
+):
+    """Ingest images and annotations in COCO format to a Deep Lake Dataset. The source data can be stored locally or in the cloud.
+
+    Args:
+        images_directory (str, pathlib.Path): The path to the directory containing images.
+        annotation_files Dict(str, Union[str, pathlib.Path]): dictionary from key to path to JSON annotation file in COCO format.
+            - the required keys are the following `instances`, `keypoints` and `stuff`
+        dest (str, pathlib.Path):
+            - The full path to the dataset. Can be:
+            - a Deep Lake cloud path of the form ``al://org_id/datasetname``. To write to Deep Lake cloud datasets, ensure that you are authenticated to Deep Lake (pass in a token using the 'token' parameter).
+            - an s3 path of the form ``s3://bucketname/path/to/dataset``. Credentials are required in either the environment or passed to the creds argument.
+            - a local file system path of the form ``./path/to/dataset`` or ``~/path/to/dataset`` or ``path/to/dataset``.
+            - a memory path of the form ``mem://path/to/dataset`` which doesn't save the dataset but keeps it in memory instead. Should be used only for testing as it does not persist.
+        dest_creds (Optional[Dict[str, str]]): The dictionary containing credentials used to access the destination path of the dataset.
+
+    Returns:
+        Dataset: The Dataset created from images and COCO annotations.
+
+    Raises:
+        CocoAnnotationMissingError: If one or many annotation key is missing from file.
+    """
+
+    dest = convert_pathlib_to_string_if_needed(dest)
+    images_directory = convert_pathlib_to_string_if_needed(images_directory)
+
+    annotation_files = verify_coco_annotation_dict(annotation_files)
+
+    dist_ds = dp.create(dest, dict(dest_creds) if dest_creds is not None else {})
+
+    unstructured = COCOStructuredDataset(
+        dataset=dist_ds,
+        images_directory=images_directory,
+        annotation_files=annotation_files,
+    )
+
+    unstructured.structure()
+
+    return dist_ds
diff --git a/python/deeplake/integrations/__init__.py b/python/deeplake/integrations/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/deeplake/integrations/constants.py b/python/deeplake/integrations/constants.py
new file mode 100644
index 0000000000..32b2f7dc15
--- /dev/null
+++ b/python/deeplake/integrations/constants.py
@@ -0,0 +1,5 @@
+# constant showing the GPU memory cleanup interval
+TIME_INTERVAL_FOR_CUDA_MEMORY_CLEANING = 10 * 60
+
+# DEEPLAKE_AUTH_TOKEN holds the value of the ACTIVELOOP_TOKEN environment variable
+DEEPLAKE_AUTH_TOKEN = "ACTIVELOOP_TOKEN"
diff --git a/python/deeplake/integrations/mm/__init__.py b/python/deeplake/integrations/mm/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/deeplake/integrations/mm/exceptions.py b/python/deeplake/integrations/mm/exceptions.py
new file mode 100644
index 0000000000..dffa82406c
--- /dev/null
+++ b/python/deeplake/integrations/mm/exceptions.py
@@ -0,0 +1,24 @@
+class EmptyTokenException(Exception):
+    def __init__(self, message="The authentication token is empty."):
+        super().__init__(message)
+
+
+class ValidationDatasetMissingError(Exception):
+    def __init__(self):
+        msg = (
+            "Validation dataset is not specified even though validate = True. "
+            "Please set validate = False or specify a validation dataset."
+        )
+        super().__init__(msg)
+
+
+class InvalidImageError(Exception):
+    def __init__(self, column_name, ex):
+        msg = f"Error on {column_name} data getting: {str(ex)}"
+        super().__init__(msg)
+
+
+class InvalidSegmentError(Exception):
+    def __init__(self, column_name, ex):
+        msg = f"Error on {column_name} data getting: {str(ex)}"
+        super().__init__(msg)
diff --git a/python/deeplake/integrations/mm/get_indexes.py b/python/deeplake/integrations/mm/get_indexes.py
new file mode 100644
index 0000000000..74ae89bba7
--- /dev/null
+++ b/python/deeplake/integrations/mm/get_indexes.py
@@ -0,0 +1,66 @@
+import math
+from typing import Optional
+
+
+def get_indexes(
+    dataset,
+    rank: Optional[int] = None,
+    num_replicas: Optional[int] = None,
+    drop_last: Optional[bool] = None,
+):
+    """
+    Generates a slice for a given rank in a distributed setting, dividing
+    the dataset evenly across multiple replicas.
+
+    Parameters:
+        dataset (Dataset): The dataset to split across distributed replicas.
+        rank (Optional[int]): The rank of the current process. If not specified,
+                              the function will use the distributed package to get the current rank.
+        num_replicas (Optional[int]): Total number of replicas (i.e., processes) involved in distributed training.
+                                      If not specified, the function will determine the number based on the world size.
+        drop_last (Optional[bool]): If True, drop the extra data not evenly divisible among replicas.
+                                    This is useful for maintaining equal batch sizes across replicas.
+
+    Returns:
+        slice: A slice object representing the start and end indices for the current rank's portion of the dataset.
+
+    Raises:
+        RuntimeError: If the distributed package is not available when `rank` or `num_replicas` are not specified.
+        ValueError: If the specified `rank` is out of range based on the number of replicas.
+
+    Notes:
+        This function requires the `torch.distributed` package to determine the number of replicas and
+        rank when they are not provided. It is useful in distributed data loading to ensure each process
+        gets a specific subset of the data.
+    """
+    import torch.distributed as dist
+
+    if num_replicas is None:
+        if not dist.is_available():
+            raise RuntimeError("Requires distributed package to be available")
+        num_replicas = dist.get_world_size()
+    if rank is None:
+        if not dist.is_available():
+            raise RuntimeError("Requires distributed package to be available")
+        rank = dist.get_rank()
+    if rank >= num_replicas or rank < 0:
+        raise ValueError(
+            "Invalid rank {}, rank should be in the interval"
+            " [0, {}]".format(rank, num_replicas - 1)
+        )
+
+    dataset_length = len(dataset)
+
+    if drop_last:
+        total_size = (dataset_length // num_replicas) * num_replicas
+        per_process = total_size // num_replicas
+    else:
+        per_process = math.ceil(dataset_length / num_replicas)
+        total_size = per_process * num_replicas
+
+    start_index = rank * per_process
+    end_index = min(start_index + per_process, total_size)
+
+    end_index = min(end_index, dataset_length)
+
+    return slice(start_index, end_index)
diff --git a/python/deeplake/integrations/mm/ipc.py b/python/deeplake/integrations/mm/ipc.py
new file mode 100644
index 0000000000..71e418db7f
--- /dev/null
+++ b/python/deeplake/integrations/mm/ipc.py
@@ -0,0 +1,6 @@
+import socketserver
+
+
+def _get_free_port() -> int:
+    with socketserver.TCPServer(("localhost", 0), None) as s:  # type: ignore
+        return s.server_address[1]
diff --git a/python/deeplake/integrations/mm/mm_common.py b/python/deeplake/integrations/mm/mm_common.py
new file mode 100644
index 0000000000..dcaa5c639d
--- /dev/null
+++ b/python/deeplake/integrations/mm/mm_common.py
@@ -0,0 +1,220 @@
+import os
+import torch
+import warnings
+import mmcv  # type: ignore
+import deeplake as dp
+from deeplake.types import TypeKind
+from deeplake.integrations.mm.warnings import always_warn
+from deeplake.integrations.mm.exceptions import EmptyTokenException
+from deeplake.integrations.constants import DEEPLAKE_AUTH_TOKEN
+
+
+def ddp_setup(rank: int, world_size: int, port: int):
+    """
+    Args:
+        rank: Unique identifier of each process
+        world_size: Total number of processes
+        port: Port number
+    """
+    os.environ["MASTER_ADDR"] = "localhost"
+    os.environ["MASTER_PORT"] = str(port)
+    torch.distributed.init_process_group(
+        backend="nccl", rank=rank, world_size=world_size
+    )
+
+
+def force_cudnn_initialization(device_id):
+    dev = torch.device(f"cuda:{device_id}")
+    torch.nn.functional.conv2d(
+        torch.zeros(32, 32, 32, 32, device=dev), torch.zeros(32, 32, 32, 32, device=dev)
+    )
+
+
+def load_ds_from_cfg(cfg: mmcv.utils.config.ConfigDict):
+    creds = cfg.get("deeplake_credentials", {})
+    token = creds.get("token", None)
+    deeplake_commit = cfg.get("deeplake_commit")
+    deeplake_tag_id = cfg.get("deeplake_tag_id")
+    deeplake_query = cfg.get("deeplake_query")
+    token = token or os.environ.get(DEEPLAKE_AUTH_TOKEN)
+    if token is None:
+        raise EmptyTokenException()
+
+    try:
+        ds = dp.open_read_only(cfg.deeplake_path, token=token, creds=creds)
+    except:
+        if not deeplake_query:
+            raise
+        ds = dp.query(deeplake_query)
+
+    if deeplake_tag_id and deeplake_query:
+        raise Exception(
+            "A query and view_id were specified simultaneously for a dataset in the config. Please specify either the deeplake_query or the deeplake_tag_id."
+        )
+
+    if deeplake_commit:
+        ds.checkout(deeplake_commit)
+
+    if deeplake_tag_id:
+        ds = ds.tags(deeplake_tag_id).open()
+
+    if deeplake_query:
+        ds = ds.query(deeplake_query)
+
+    return ds
+
+
+def get_collect_keys(cfg):
+    pipeline = cfg.train_pipeline
+    for transform in pipeline:
+        if transform["type"] == "Collect":
+            return transform["keys"]
+    raise ValueError("collection keys were not specified")
+
+
+def check_persistent_workers(train_persistent_workers, val_persistent_workers):
+    if train_persistent_workers != val_persistent_workers:
+        if train_persistent_workers:
+            always_warn(
+                "persistent workers for training and evaluation should be identical, "
+                "otherwise, this could lead to performance issues. "
+                "Either both of then should be `True` or both of them should `False`. "
+                "If you want to use persistent workers set True for validation"
+            )
+        else:
+            always_warn(
+                "persistent workers for training and evaluation should be identical, "
+                "otherwise, this could lead to performance issues. "
+                "Either both of then should be `True` or both of them should `False`. "
+                "If you want to use persistent workers set True for training"
+            )
+
+
+def find_image_tensor(ds: dp.Dataset, mm_class=None):
+    images = [
+        col.name
+        for col in ds.schema.columns
+        if ds.schema[col.name].dtype.is_image
+    ]
+    if mm_class is not None:
+        always_warn(
+            f"No deeplake column name specified for '{mm_class} in config. Fetching it using type_kind '{TypeKind.Image}'."
+        )
+    if not images:
+        always_warn(f"No column found with type_kind='{TypeKind.Image}'")
+        return None
+    t = images[0]
+    if len(images) > 1:
+        always_warn(
+            f"Multiple columns with type_kind='{TypeKind.Image}' found. choosing '{t}'."
+        )
+    print(f"columns {images} kind {TypeKind.Image} mm_class {mm_class} t {t}")
+    return t
+
+
+def find_smask_tensor(ds: dp.Dataset, mm_class=None):
+    smasks = [
+        col.name
+        for col in ds.schema.columns
+        if ds.schema[col.name].dtype.is_segment_mask
+    ]
+    if mm_class is not None:
+        always_warn(
+            f"No deeplake column name specified for '{mm_class} in config. Fetching it using type_kind '{TypeKind.SegmentMask}'."
+        )
+    if not smasks:
+        always_warn(f"No column found with type_kind='{TypeKind.SegmentMask}'")
+        return None
+    t = smasks[0]
+    if len(smasks) > 1:
+        always_warn(
+            f"Multiple columns with type_kind='{TypeKind.SegmentMask}' found. choosing '{t}'."
+        )
+    print(f"columns {smasks} kind {TypeKind.SegmentMask} mm_class {mm_class} t {t}")
+    return t
+
+
+def find_tensor_with_htype(ds: dp.Dataset, type_kind=TypeKind.Image, mm_class=None):
+    colunms = [col.name for col in ds.schema.columns if col.dtype.kind == type_kind]
+    if mm_class is not None:
+        always_warn(
+            f"No deeplake column name specified for '{mm_class} in config. Fetching it using type_kind '{type_kind}'."
+        )
+    if not colunms:
+        always_warn(f"No column found with type_kind='{type_kind}'")
+        return None
+    t = colunms[0]
+    if len(colunms) > 1:
+        always_warn(
+            f"Multiple columns with type_kind='{type_kind}' found. choosing '{t}'."
+        )
+
+    print(f"columns {colunms} kind {type_kind} mm_class {mm_class} t {t}")
+    return t
+
+
+def check_unsupported_functionalities(cfg):
+    check_unused_dataset_fields(cfg)
+    check_unsupported_train_pipeline_fields(cfg, mode="train")
+    check_unsupported_train_pipeline_fields(cfg, mode="val")
+    check_dataset_augmentation_formats(cfg)
+
+
+def check_unused_dataset_fields(cfg):
+    if cfg.get("dataset_type"):
+        always_warn(
+            "The deeplake mmdet integration does not use dataset_type to work with the data and compute metrics. All deeplake datasets are in the same deeplake format. To specify a metrics format, you should deeplake_metrics_format "
+        )
+
+    if cfg.get("data_root"):
+        always_warn(
+            "The deeplake mmdet integration does not use data_root, this input will be ignored"
+        )
+
+
+def check_unsupported_train_pipeline_fields(cfg, mode="train"):
+    transforms = cfg.data[mode].pipeline
+
+    for transform in transforms:
+        transform_type = transform.get("type")
+
+        if transform_type == "LoadImageFromFile":
+            always_warn(
+                "LoadImageFromFile is going to be skipped because deeplake mmdet integration does not use it"
+            )
+
+        if transform_type == "LoadAnnotations":
+            always_warn(
+                "LoadAnnotations is going to be skipped because deeplake mmdet integration does not use it"
+            )
+
+        if transform_type == "Corrupt":
+            raise Exception("Corrupt augmentation is not supported yet.")
+
+        elif transform_type == "CopyPaste":  # TO DO: @adolkhan resolve this
+            raise Exception("CopyPaste augmentation is not supported yet")
+
+        elif transform_type == "CutOut":  # TO DO: @adolkhan resolve this
+            raise Exception("CutOut augmentation is not supported yet")
+
+        elif transform_type == "Mosaic":  # TO DO: @adolkhan resolve this
+            raise Exception("Mosaic augmentation is not supported yet")
+
+
+def check_dataset_augmentation_formats(cfg):
+    if cfg.get("train_dataset"):
+        always_warn(
+            "train_dataset is going to be unused. Dataset types like: ConcatDataset, RepeatDataset, ClassBalancedDataset, MultiImageMixDataset are not supported."
+        )
+
+
+def get_pipeline(cfg, *, name: str, generic_name: str):
+    pipeline = cfg.data[name].get("pipeline", None)
+    if pipeline is None:
+        warnings.warn(
+            f"Warning: The '{name}' data pipeline is missing in the configuration. Attempting to locate in '{generic_name}'."
+        )
+
+        pipeline = cfg.get(generic_name, [])
+
+    return pipeline
diff --git a/python/deeplake/integrations/mm/mm_runners.py b/python/deeplake/integrations/mm/mm_runners.py
new file mode 100644
index 0000000000..97b9786f10
--- /dev/null
+++ b/python/deeplake/integrations/mm/mm_runners.py
@@ -0,0 +1,152 @@
+import mmcv  # type: ignore
+
+import torch
+import logging
+from mmcv import runner
+from torch.utils.data import DataLoader
+
+import time
+import warnings
+from typing import List, Tuple, Optional
+from deeplake.integrations.constants import TIME_INTERVAL_FOR_CUDA_MEMORY_CLEANING
+
+
+def empty_cuda():
+    try:
+        torch.cuda.empty_cache()
+    except Exception:
+        pass
+    return
+
+
+@runner.RUNNERS.register_module()
+class DeeplakeIterBasedRunner(runner.IterBasedRunner):
+    def __init__(self, **kwargs):
+        self.force_cleanup = kwargs.pop("force_cleanup", True)
+        super().__init__(**kwargs)
+
+    def run(
+        self,
+        data_loaders: List[DataLoader],
+        workflow: List[Tuple[str, int]],
+        max_iters: Optional[int] = None,
+        **kwargs,
+    ) -> None:
+        assert isinstance(data_loaders, list)
+        assert mmcv.is_list_of(workflow, tuple)
+        assert len(data_loaders) == len(workflow)
+        if max_iters is not None:
+            warnings.warn(
+                "setting max_iters in run is deprecated, "
+                "please set max_iters in runner_config",
+                DeprecationWarning,
+            )
+            self._max_iters = max_iters
+        assert (
+            self._max_iters is not None
+        ), "max_iters must be specified during instantiation"
+
+        work_dir = self.work_dir if self.work_dir is not None else "NONE"
+        self.logger.info(
+            "Start running, host: %s, work_dir: %s",
+            runner.utils.get_host_info(),
+            work_dir,
+        )
+        self.logger.info(
+            "Hooks will be executed in the following order:\n%s", self.get_hook_info()
+        )
+        self.logger.info("workflow: %s, max: %d iters", workflow, self._max_iters)
+        self.call_hook("before_run")
+
+        iter_loaders = [runner.IterLoader(x) for x in data_loaders]
+
+        self.call_hook("before_epoch")
+
+        formatter = logging.Formatter("%(relative)ss")
+        start_time = time.time()
+
+        while self.iter < self._max_iters:
+            for i, flow in enumerate(workflow):
+                self._inner_iter = 0
+                mode, iters = flow
+                if not isinstance(mode, str) or not hasattr(self, mode):
+                    raise ValueError(
+                        'runner has no method named "{}" to run a workflow'.format(mode)
+                    )
+                iter_runner = getattr(self, mode)
+                for _ in range(iters):
+                    if mode == "train" and self.iter >= self._max_iters:
+                        break
+
+                    iter_time = time.time()
+
+                    if (
+                        self.force_cleanup
+                        and iter_time - start_time
+                        > TIME_INTERVAL_FOR_CUDA_MEMORY_CLEANING
+                    ):
+                        empty_cuda()
+                        start_time = iter_time
+                    iter_runner(iter_loaders[i], **kwargs)
+
+        time.sleep(1)  # wait for some hooks like loggers to finish
+        self.call_hook("after_epoch")
+        self.call_hook("after_run")
+
+
+@runner.RUNNERS.register_module()
+class DeeplakeEpochBasedRunner(runner.EpochBasedRunner):
+    def __init__(self, **kwargs):
+        self.force_cleanup = kwargs.pop("force_cleanup", True)
+        super().__init__(**kwargs)
+
+    def train(self, data_loader, **kwargs):
+        start_time = time.time()
+        self.model.train()
+        self.mode = "train"
+        self.data_loader = data_loader
+        self._max_iters = self._max_epochs * len(self.data_loader)
+        self.call_hook("before_train_epoch")
+        time.sleep(2)  # Prevent possible deadlock during epoch transition
+        for i, data_batch in enumerate(self.data_loader):
+            self.data_batch = data_batch
+            self._inner_iter = i
+            self.call_hook("before_train_iter")
+            self.run_iter(data_batch, train_mode=True, **kwargs)
+            self.call_hook("after_train_iter")
+            del self.data_batch
+            self._iter += 1
+            iter_time = time.time()
+            if (
+                self.force_cleanup
+                and iter_time - start_time > TIME_INTERVAL_FOR_CUDA_MEMORY_CLEANING
+            ):
+                empty_cuda()
+                start_time = iter_time
+
+        self.call_hook("after_train_epoch")
+        self._epoch += 1
+
+    @torch.no_grad()
+    def val(self, data_loader, **kwargs):
+        start_time = time.time()
+        self.model.eval()
+        self.mode = "val"
+        self.data_loader = data_loader
+        self.call_hook("before_val_epoch")
+        time.sleep(2)  # Prevent possible deadlock during epoch transition
+        for i, data_batch in enumerate(self.data_loader):
+            self.data_batch = data_batch
+            self._inner_iter = i
+            self.call_hook("before_val_iter")
+            self.run_iter(data_batch, train_mode=False)
+            self.call_hook("after_val_iter")
+            del self.data_batch
+            iter_time = time.time()
+            if (
+                self.force_cleanup
+                and iter_time - start_time > TIME_INTERVAL_FOR_CUDA_MEMORY_CLEANING
+            ):
+                empty_cuda()
+                start_time = iter_time
+        self.call_hook("after_val_epoch")
diff --git a/python/deeplake/integrations/mm/upcast_array.py b/python/deeplake/integrations/mm/upcast_array.py
new file mode 100644
index 0000000000..8f94dcdb32
--- /dev/null
+++ b/python/deeplake/integrations/mm/upcast_array.py
@@ -0,0 +1,15 @@
+import numpy as np
+from typing import Union
+
+
+def upcast_array(arr: Union[np.ndarray, bytes]):
+    if isinstance(arr, list):
+        return [upcast_array(a) for a in arr]
+    if isinstance(arr, np.ndarray):
+        if arr.dtype == np.uint16:
+            return arr.astype(np.int32)
+        if arr.dtype == np.uint32:
+            return arr.astype(np.int64)
+        if arr.dtype == np.uint64:
+            return arr.astype(np.int64)
+    return arr
diff --git a/python/deeplake/integrations/mm/warnings.py b/python/deeplake/integrations/mm/warnings.py
new file mode 100644
index 0000000000..fc2193d487
--- /dev/null
+++ b/python/deeplake/integrations/mm/warnings.py
@@ -0,0 +1,7 @@
+import warnings
+
+
+def always_warn(*args, **kwargs):
+    with warnings.catch_warnings():
+        warnings.simplefilter("always")
+        warnings.warn(*args, **kwargs)
diff --git a/python/deeplake/integrations/mm/worker_init_fn.py b/python/deeplake/integrations/mm/worker_init_fn.py
new file mode 100644
index 0000000000..43c4282d80
--- /dev/null
+++ b/python/deeplake/integrations/mm/worker_init_fn.py
@@ -0,0 +1,21 @@
+import numpy as np
+import torch
+import random
+
+
+def worker_init_fn(worker_id, num_workers, rank, seed):
+    """Worker init func for dataloader.
+
+    The seed of each worker equals to num_worker * rank + worker_id + user_seed
+
+    Args:
+        worker_id (int): Worker id.
+        num_workers (int): Number of workers.
+        rank (int): The rank of current process.
+        seed (int): The random seed to use.
+    """
+
+    worker_seed = num_workers * rank + worker_id + seed
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+    torch.manual_seed(worker_seed)
diff --git a/python/deeplake/integrations/mmdet/__init__.py b/python/deeplake/integrations/mmdet/__init__.py
new file mode 100644
index 0000000000..3731638a14
--- /dev/null
+++ b/python/deeplake/integrations/mmdet/__init__.py
@@ -0,0 +1,2 @@
+from deeplake.integrations.mmdet.mmdet_ import train_detector
+from mmdet.models import build_detector  # type: ignore
diff --git a/python/deeplake/integrations/mmdet/mmdet_.py b/python/deeplake/integrations/mmdet/mmdet_.py
new file mode 100644
index 0000000000..32a9e79573
--- /dev/null
+++ b/python/deeplake/integrations/mmdet/mmdet_.py
@@ -0,0 +1,813 @@
+"""
+Deep Lake offers an integration with MMDetection, a popular open-source object detection toolbox based on PyTorch. 
+The integration enables users to train models while streaming Deep Lake dataset using the transformation, training, and evaluation tools built by MMDet.
+
+Learn more about MMDetection `here <https://mmdetection.readthedocs.io/en/latest/>`_.
+
+Integration Interface
+~~~~~~~~~~~~~~~~~~~~~
+MMDetection works with configs. Deeplake adopted this strategy, and in order to train MMDet models, you need to create/specify your model 
+and training/validation config. Deep Lake integration's logic is almost the same as MMDetection's with some minor modifications. The integrations 
+with MMDET occurs in the deeplake.integrations.mmdet module. At a high-level, Deep Lake is responsible for the pytorch dataloader that streams data 
+to the training framework, while MMDET is used for the training, transformation, and evaluation logic. Let us take a look at the config with deeplake changes:
+
+Deeplake integration requires the following parameters to be specified in the configuration file:
+
+- ``data``: Just like in the MMDetection configuration files, in data dictionary you can specify everything that you want to be applied to the data during training and validation
+    - ``train``: Keyword argument of data, a dictionary where one can specify dataset path, credentials, transformations of the training data
+    - ``val``: Keyword argument of data, a dictionary where one can specify dataset path, credentials, transformations of the validation data
+    - ``pipeline``: List of transformations. This parameter exists for train as well as for val.
+    
+        - Example:
+    
+            >>> pipeline =  [dict(type="Resize", img_scale=[(320, 320), (608, 608)], keep_ratio=True), dict(type="RandomFlip", flip_ratio=0.5), dict(type="PhotoMetricDistortion")]
+
+    - ``deeplake_path``: Path to the deeplake dataset. This parameter exists for train as well as for val.
+    - ``deeplake_credentials``: Optional parameter. Required only when using private nonlocal datasets. See documendataion for `deeplake.open_read_only() https://docs.deeplake.ai/latest/api/dataset/#deeplake.open_read_only`_ for details. This parameter exists for train as well as for val.
+    - ``deeplake_tag_id``: Optional parameter. If specified, the dataset will checkout to the tag. This parameter exists for train as well as for val. See documentation for `Dataset.commit_id <https://deep-lake--2152.org.readthedocs.build/en/2152/deeplake.core.dataset.html#deeplake.core.dataset.Dataset.commit_id>`_
+    - ``deeplake_query``: Optional paramterer. If specified, the dataset can be loaded from the query is dataset_path was not been specified as well as the applied on that dataset of dataset_path was specified before
+    - ``deeplake_tensors``: Optional parameter. If specified maps MMDetection tensors to the associated tensors in the dataset. MMDet tensors are: "img", "gt_bboxes", "gt_labels", "gt_masks". This parameter exists for train as well as for val.
+        - ``"img"``: Stands for image tensor.
+        - ``"gt_bboxes"``: Stands for bounding box tensor.
+        - ``"gt_labels"``: Stands for labels tensor.
+        - ``"gt_masks"``: Stands for masks tensor.
+
+    - ``deeplake_dataloader``: Optional parameter. If specified represents the parameters of the deeplake dataloader. Deeplake dataloader parameters are: "shuffle", "batch_size", "num_workers". This parameter exists for train as well as for val.
+        - ``"shuffle"``: If ``True`` shuffles the dataset.
+        - ``"batch_size"``: Size of batch. If not specified, dataloader will use ``samples_per_gpu``.
+        - ``"num_workers"``: Number of workers to use. If not specified, dataloader will use ``workers_per_gpu``.
+
+- ``deeplake_metrics_format``: Optional parameter. If specified, it represents the format of the deeplake metrics that will be used during evaluation. Defaults to COCO. 
+    Avaliable values are: "COCO", "PascalVOC". If COCO format is used, you can specify whether you want to evaluate on bbox only or also want to evaluate on masks. 
+    To do that you need to specify the format of the metric in metric. 
+  
+Example:
+
+>>> deeplake_metrics_format = "COCO"
+>>> evaluation = dict(metric=["bbox"], interval=1)
+
+- ``train_detector``: Function to train the MMDetection model.
+
+    Parameters:
+
+        - ``model``: MMDetection model that is going to be used.
+        - ``cfg``: mmcv.ConfigDict, Configuration of the model as well as of the datasets and transforms that's going to be used.
+        - ``ds_train``: Optional parameter. If provided will overwrite deeplake_path in train, and will pass this tensor directly to the dataloader.
+        - ``ds_val``: Optional parameter. If provided will overwrite deeplake_path in val, and will pass this tensor directly to the dataloader.
+        - ``ds_train_tensors``: Optional parameter. If provided will overwrite deeplake_tensors in train, and will pass this tensor mapping directly to dataloader.
+        - ``ds_val_tensors``: Optional parameter. If provided will overwrite deeplake_tensors in val, and will pass this tensor mapping directly to dataloader.
+        - ``distributed``: Optional parameter. If provided will run the code on all available gpus. Meta data used to build runner.
+        - ``timestamp``: Variable used in runner to make .log and .log.json filenames the same.
+        - ``validate``: Bool, whether validation should be run, defaults to ``True``.
+
+NOTE:
+    ``gt_masks`` is optional parameter and lets say you want to train pure detector this part is going to exclude. Other mappings are mandatory
+    if you don't specify them explicitly they are going to be searched in the dataset according to tensor htype. Better to specify them explicitly.
+
+MMDetection Config Examples
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Below is the example of the deeplake mmdet configuration:
+
+
+>>> _base_ = "../mmdetection/configs/yolo/yolov3_d53_mstrain-416_273e_coco.py"
+>>> # use caffe img_norm
+>>> img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True)
+>>> train_pipeline = [
+...     dict(type='LoadImageFromFile'),
+...     dict(type='LoadAnnotations', with_bbox=True),
+...     dict(
+...         type='Expand',
+...         mean=img_norm_cfg['mean'],
+...         to_rgb=img_norm_cfg['to_rgb'],
+...         ratio_range=(1, 2)),
+...     dict(type='Resize', img_scale=[(320, 320), (416, 416)], keep_ratio=True),
+...     dict(type='RandomFlip', flip_ratio=0.0),
+...     dict(type='PhotoMetricDistortion'),
+...     dict(type='Normalize', **img_norm_cfg),
+...     dict(type='Pad', size_divisor=32),
+...     dict(type='DefaultFormatBundle'),
+...     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+... ]
+>>> test_pipeline = [
+...     dict(type='LoadImageFromFile'),
+...     dict(
+...         type='MultiScaleFlipAug',
+...         img_scale=(416, 416),
+...         flip=False,
+...         transforms=[
+...             dict(type='Resize', keep_ratio=True),
+...             dict(type='RandomFlip', flip_ratio=0.0),
+...             dict(type='Normalize', **img_norm_cfg),
+...             dict(type='Pad', size_divisor=32),
+...             dict(type='ImageToTensor', keys=['img']),
+...             dict(type='Collect', keys=['img'])
+...         ])
+... ]
+>>> #--------------------------------------DEEPLAKE INPUTS------------------------------------------------------------#
+>>> TOKEN = "INSERT_YOUR_DEEPLAKE_TOKEN" 
+>>> data = dict(
+...     # samples_per_gpu=4, # Is used instead of batch_size if deeplake_dataloader is not specified below
+...     # workers_per_gpu=8, # Is used instead of num_workers if deeplake_dataloader is not specified below
+...     train=dict(
+...         pipeline=train_pipeline,
+...         # Credentials for authentication. See documendataion for deeplake.open() for details
+...         deeplake_path="al://activeloop/coco-train",
+...          deeplake_credentials={
+...             "token": TOKEN,
+...             "creds": None,
+...         },
+...         #OPTIONAL - Checkout the specified commit_id before training
+...         deeplake_commit_id="",
+...         #OPTIONAL - Loads a dataset tag for training based on tag_id
+...         deeplake_tag_id="",
+...         # OPTIONAL - {"mmdet_key": "deep_lake_tensor",...} - Maps Deep Lake tensors to MMDET dictionary keys. 
+...         # If not specified, Deep Lake will auto-infer the mapping, but it might make mistakes if datasets have many tensors
+...         deeplake_tensors = {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories", "gt_masks": "masks},         
+...         # OPTIONAL - Parameters to use for the Deep Lake dataloader. If unspecified, the integration uses
+...         # the parameters in other parts of the cfg file such as samples_per_gpu, and others.
+...         deeplake_dataloader = {"shuffle": True, "batch_size": 4, 'num_workers': 8}
+...     ),
+...     # Parameters as the same as for train
+...     val=dict(
+...         pipeline=test_pipeline,
+...         deeplake_path="al://activeloop/coco-val",
+...         deeplake_credentials={
+...             "token": TOKEN,
+...             "creds": None,
+...         },
+...         deeplake_tensors = {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"},
+...         deeplake_dataloader = {"shuffle": False, "batch_size": 1, 'num_workers': 8}
+...     ),
+... )
+>>> # Which dataloader to use
+>>> # Which metrics to use for evaulation. In MMDET (without Deeplake), this is inferred from the dataset type.
+>>> # In the Deep Lake integration, since the format is standardized, a variety of metrics can be used for a given dataset.
+>>> deeplake_metrics_format = "COCO"
+>>> #----------------------------------END DEEPLAKE INPUTS------------------------------------------------------------#
+
+And config for training:
+
+>>> import os
+>>> from mmcv import Config
+>>> import mmcv
+>>> from deeplake.integrations import mmdet as mmdet_deeplake
+>>> cfg = Config.fromfile(cfg_file)
+>>> cfg.model.bbox_head.num_classes = num_classes
+>>> # Build the detector
+>>> model = mmdet_deeplake.build_detector(cfg.model)
+>>> # Create work_dir
+>>> mmcv.mkdir_or_exist(os.path.abspath(cfg.work_dir))
+>>> # Run the training
+>>> mmdet_deeplake.train_detector(model, cfg, distributed=args.distributed, validate=args.validate)
+"""
+
+from collections import OrderedDict
+
+from typing import Callable, Optional, List, Dict, Sequence
+
+from functools import partial
+
+import os
+import math
+import types
+import torch
+import warnings
+import tempfile
+import numpy as np
+import os.path as osp
+
+from PIL import Image, ImageDraw  # type: ignore
+
+from terminaltables import AsciiTable  # type: ignore
+
+try:
+    from mmdet.apis.train import auto_scale_lr  # type: ignore
+except Exception:
+    import mmdet  # type: ignore
+
+    version = mmdet.__version__
+    raise Exception(
+        f"MMDet {version} version is not supported. The latest supported MMDet version with deeplake is 2.28.1."
+    )
+from mmdet.utils import (  # type: ignore
+    build_dp,
+    compat_cfg,
+    find_latest_checkpoint,
+    get_root_logger,
+)
+from mmdet.core import DistEvalHook, EvalHook  # type: ignore
+from mmdet.core import build_optimizer
+
+from mmdet.datasets import replace_ImageToTensor  # type: ignore
+
+from mmdet.datasets.builder import PIPELINES  # type: ignore
+from mmdet.datasets.pipelines import Compose  # type: ignore
+from mmdet.core import BitmapMasks  # type: ignore
+from mmdet.core import eval_map, eval_recalls
+from mmdet.utils.util_distribution import *  # type: ignore
+from mmdet.core import BitmapMasks, PolygonMasks
+
+import mmcv  # type: ignore
+from mmcv.runner import init_dist  # type: ignore
+from mmcv.parallel import collate  # type: ignore
+from mmcv.utils import build_from_cfg, digit_version  # type: ignore
+from mmcv.utils import print_log
+from mmcv.runner import (  # type: ignore
+    DistSamplerSeedHook,
+    EpochBasedRunner,
+    Fp16OptimizerHook,
+    OptimizerHook,
+    build_runner,
+    get_dist_info,
+)
+
+import deeplake as dp
+from deeplake.types import TypeKind
+from deeplake.integrations.mm.exceptions import ValidationDatasetMissingError
+
+from deeplake.integrations.mmdet.mmdet_dataset_ import (
+    MMDetTorchDataset,
+    MMDetDataset,
+    transform,
+)
+from deeplake.integrations.mm.ipc import _get_free_port
+from deeplake.integrations.mm.warnings import always_warn
+from deeplake.integrations.mm.get_indexes import get_indexes
+from deeplake.integrations.mm.upcast_array import upcast_array
+from deeplake.integrations.mm.worker_init_fn import worker_init_fn
+from deeplake.integrations.mm.mm_runners import DeeplakeIterBasedRunner
+from deeplake.integrations.mm.mm_common import (
+    load_ds_from_cfg,
+    get_collect_keys,
+    check_persistent_workers,
+    find_tensor_with_htype,
+    find_image_tensor,
+    ddp_setup,
+    force_cudnn_initialization,
+    check_unsupported_functionalities,
+    get_pipeline,
+)
+
+from torch.utils.data import DataLoader
+
+# Monkey-patch the function
+from deeplake.integrations.mmdet.test_ import single_gpu_test as custom_single_gpu_test
+from deeplake.integrations.mmdet.test_ import multi_gpu_test as custom_multi_gpu_test
+
+import mmdet.apis
+
+mmdet.apis.single_gpu_test = custom_single_gpu_test
+mmdet.apis.multi_gpu_test = custom_multi_gpu_test
+
+
+def build_ddp(model, device, *args, **kwargs):
+    """Build DistributedDataParallel module by device type.
+
+    If device is cuda, return a MMDistributedDataParallel model;
+    if device is mlu, return a MLUDistributedDataParallel model.
+
+    Args:
+        model (:class:`nn.Module`): module to be parallelized.
+        device (str): device type, mlu or cuda.
+        args (List): arguments to be passed to ddp_factory
+        kwargs (dict): keyword arguments to be passed to ddp_factory
+
+    Returns:
+        :class:`nn.Module`: the module to be parallelized
+
+    References:
+        .. [1] https://pytorch.org/docs/stable/generated/torch.nn.parallel.
+                     DistributedDataParallel.html
+    """
+
+    assert device in ["cuda", "mlu"], "Only available for cuda or mlu devices."
+    if device == "cuda":
+        model = model.cuda(kwargs["device_ids"][0])  # patch
+    elif device == "mlu":
+        from mmcv.device.mlu import MLUDistributedDataParallel  # type: ignore
+
+        ddp_factory["mlu"] = MLUDistributedDataParallel
+        model = model.mlu()
+
+    return ddp_factory[device](model, *args, **kwargs)
+
+
+def mmdet_subiterable_dataset_eval(
+    self,
+    *args,
+    **kwargs,
+):
+    return self.dataset.mmdet_dataset.evaluate(*args, **kwargs)
+
+
+def build_dataloader(
+    dataset: dp.Dataset,
+    images_tensor: str,
+    masks_tensor: Optional[str],
+    boxes_tensor: str,
+    labels_tensor: str,
+    pipeline: List,
+    mode: str = "train",
+    **loader_config,
+):
+    poly2mask = False
+    if masks_tensor is not None:
+        if dataset.schema[masks_tensor].dtype.kind == TypeKind.Polygon:
+            poly2mask = True
+
+    bbox_info = dict(dataset[boxes_tensor].metadata)
+    classes = dataset[labels_tensor].metadata["class_names"]
+    pipeline = build_pipeline(pipeline)
+    metrics_format = loader_config.get("metrics_format")
+    persistent_workers = loader_config.get("persistent_workers", False)
+    dist = loader_config["dist"]
+    seed = loader_config["seed"]
+
+    transform_fn = partial(
+        transform,
+        images_tensor=images_tensor,
+        masks_tensor=masks_tensor,
+        boxes_tensor=boxes_tensor,
+        labels_tensor=labels_tensor,
+        pipeline=pipeline,
+        bbox_info=bbox_info,
+        poly2mask=poly2mask,
+    )
+
+    num_workers = loader_config.get("num_workers")
+    pin_memory = loader_config.get("pin_memory", False)
+    if num_workers is None:
+        num_workers = loader_config["workers_per_gpu"]
+
+    shuffle = loader_config.get("shuffle", True)
+    tensors_dict = {
+        "images_tensor": images_tensor,
+        "boxes_tensor": boxes_tensor,
+        "labels_tensor": labels_tensor,
+    }
+    tensors = [images_tensor, labels_tensor, boxes_tensor]
+    if masks_tensor is not None:
+        tensors.append(masks_tensor)
+        tensors_dict["masks_tensor"] = masks_tensor
+
+    batch_size = loader_config.get("batch_size")
+    drop_last = loader_config.get("drop_last", False)
+    if batch_size is None:
+        batch_size = loader_config["samples_per_gpu"]
+
+    collate_fn = partial(collate, samples_per_gpu=batch_size)
+
+    mmdet_ds = MMDetDataset(
+        dataset=dataset,
+        metrics_format=metrics_format,
+        pipeline=pipeline,
+        tensors_dict=tensors_dict,
+        tensors=tensors,
+        mode=mode,
+        bbox_info=bbox_info,
+        num_gpus=loader_config["num_gpus"],
+        batch_size=batch_size,
+    )
+
+    if dist:
+        rank, world_size = get_dist_info()
+        sl = get_indexes(
+            dataset, rank=rank, num_replicas=world_size, drop_last=drop_last
+        )
+        dataset = dataset.query(
+            f"select * LIMIT {sl.stop - sl.start} OFFSET {sl.start}"
+        )
+
+    pytorch_ds = MMDetTorchDataset(dataset, transform=transform_fn)
+
+    init_fn = (
+        partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed)
+        if seed is not None
+        else None
+    )
+
+    if digit_version(torch.__version__) >= digit_version("1.8.0"):
+        loader = DataLoader(
+            pytorch_ds,
+            batch_size=batch_size,
+            sampler=None,
+            num_workers=num_workers,
+            collate_fn=collate_fn,
+            pin_memory=pin_memory,
+            shuffle=shuffle,
+            worker_init_fn=init_fn,
+            drop_last=drop_last,
+            persistent_workers=persistent_workers,
+        )
+    else:
+        loader = DataLoader(
+            pytorch_ds,
+            batch_size=batch_size,
+            sampler=None,
+            num_workers=num_workers,
+            collate_fn=collate_fn,
+            pin_memory=pin_memory,
+            shuffle=shuffle,
+            worker_init_fn=init_fn,
+            drop_last=drop_last,
+        )
+
+    loader.dataset.mmdet_dataset = mmdet_ds
+    loader.dataset.pipeline = loader.dataset.mmdet_dataset.pipeline
+    eval_fn = partial(mmdet_subiterable_dataset_eval, loader)
+    loader.dataset.evaluate = eval_fn
+    loader.dataset.CLASSES = classes
+    return loader
+
+
+def build_pipeline(steps):
+    return Compose(
+        [
+            build_from_cfg(step, PIPELINES, None)
+            for step in steps
+            if step["type"] not in {"LoadImageFromFile", "LoadAnnotations"}
+        ]
+    )
+
+
+def train_detector(
+    model,
+    cfg: mmcv.ConfigDict,
+    ds_train=None,
+    ds_train_tensors=None,
+    ds_val: Optional[dp.Dataset] = None,
+    ds_val_tensors=None,
+    distributed: bool = False,
+    timestamp=None,
+    meta=None,
+    validate: bool = True,
+):
+    """
+    Creates runner and trains evaluates the model:
+    Args:
+        model: model to train, should be built before passing
+        train_dataset: dataset to train of type dp.Dataset
+        cfg: mmcv.ConfigDict object containing all necessary configuration.
+            In cfg we have several changes to support deeplake integration:
+                _base_: still serves as a base model to inherit from
+                data: where everything related to data processing, you will need to specify the following parameters:
+                    train: everything related to training data, it has the following attributes:
+                        pipeline: dictionary where all training augmentations and transformations should be specified, like in mmdet
+                        deeplake_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example:  `{"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}`.
+                            If this dictionary is not specified, these tensors will be searched automatically using htypes like "image", "class_label, "bbox", "segment_mask" or "polygon".
+                            keys that needs to be mapped are: `img`, `gt_labels`, `gt_bboxes`, `gt_masks`. `img`, `gt_labels`, `gt_bboxes` are always required, if they not specified they
+                            are always searched, while masks are optional, if you specify in collect `gt_masks` then you need to either specify it in config or it will be searched based on
+                            `segment_mask` and `polygon` htypes.
+                        deeplake_credentials: dictionary with deeplake credentials that allow you to acess the specified data. It has following arguments: `token`.
+                            `token` is the token that gives you read or write access to the datasets. It is available in your personal account on: https://www.activeloop.ai/.
+                    val (Optional): everything related to validating data, it has the following attributes:
+                        pipeline: dictionary where all training augmentations and transformations should be specified, like in mmdet
+                        deeplake_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example:  {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}.
+                            If this dictionary is not specified, these tensors will be searched automatically using htypes like "image", "class_label, "bbox", "segment_mask" or "polygon".
+                            keys that needs to be mapped are: `img`, `gt_labels`, `gt_bboxes`, `gt_masks`. `img`, `gt_labels`, `gt_bboxes` are always required, if they not specified they
+                            are always searched, while masks are optional, if you specify in collect `gt_masks` then you need to either specify it in config or it will be searched based on
+                            `segment_mask` and `polygon` htypes.
+                        deeplake_credentials: deeplake credentials that allow you to acess the specified data. It has following arguments: `token`.
+                            `token` is the token that gives you read or write access to the datasets. It is available in your personal account on: https://www.activeloop.ai/.
+                    test (Optional): everything related to testing data, it has the following attributes:
+                        pipeline: dictionary where all training augmentations and transformations should be specified, like in mmdet
+                        deeplake_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example:  {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}.
+                            If this dictionary is not specified, these tensors will be searched automatically using htypes like "image", "class_label, "bbox", "segment_mask" or "polygon".
+                            keys that needs to be mapped are: `img`, `gt_labels`, `gt_bboxes`, `gt_masks`. `img`, `gt_labels`, `gt_bboxes` are always required, if they not specified they
+                            are always searched, while masks are optional, if you specify in collect `gt_masks` then you need to either specify it in config or it will be searched based on
+                            `segment_mask` and `polygon` htypes.
+                        deeplake_credentials: deeplake credentials that allow you to acess the specified data. It has following arguments: `token`.
+                            `token` is the token that gives you read or write access to the datasets. It is available in your personal account on: https://www.activeloop.ai/.
+                    samples_per_gpu: number of samples to be processed per gpu
+                    workers_per_gpu: number of workers per gpu
+                optimizer: dictionary containing information about optimizer initialization
+                optimizer_config: some optimizer configuration that might be used during training like grad_clip etc.
+                runner: training type e.g. EpochBasedRunner, here you can specify maximum number of epcohs to be conducted. For instance: `runner = dict(type='EpochBasedRunner', max_epochs=273)`
+        ds_train: train dataset of type dp.Dataset. This can be a view of the dataset.
+        ds_train_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example:  {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}.
+            If this dictionary is not specified, these tensors will be searched automatically using htypes like "image", "class_label, "bbox", "segment_mask" or "polygon".
+            keys that needs to be mapped are: `img`, `gt_labels`, `gt_bboxes`, `gt_masks`. `img`, `gt_labels`, `gt_bboxes` are always required, if they not specified they
+            are always searched, while masks are optional, if you specify in collect `gt_masks` then you need to either specify it in config or it will be searched based on
+            `segment_mask` and `polygon` htypes.
+        ds_val: validation dataset of type dp.Dataset. This can be view of the dataset.
+        ds_val_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example:  {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}.
+            If this dictionary is not specified, these tensors will be searched automatically using htypes like "image", "class_label, "bbox", "segment_mask" or "polygon".
+            keys that needs to be mapped are: `img`, `gt_labels`, `gt_bboxes`, `gt_masks`. `img`, `gt_labels`, `gt_bboxes` are always required, if they not specified they
+            are always searched, while masks are optional, if you specify in collect `gt_masks` then you need to either specify it in config or it will be searched based on
+            `segment_mask` and `polygon` htypes.
+        evaluation: dictionary that contains all information needed for evaluation apart from data processing, like how often evaluation should be done and what metrics we want to use. In deeplake
+            integration version you also need to specify what kind of output you want to be printed during evalaution. For instance, `evaluation = dict(interval=1, metric=['bbox'], metrics_format="COCO")`
+        distributed: bool, whether ddp training should be started, by default `False`
+        timestamp: variable used in runner to make .log and .log.json filenames the same
+        meta: meta data used to build runner
+        validate: bool, whether validation should be conducted, by default `True`
+    """
+    check_unsupported_functionalities(cfg)
+
+    if not hasattr(cfg, "gpu_ids"):
+        cfg.gpu_ids = range(torch.cuda.device_count() if distributed else 1)
+    if distributed:
+        return torch.multiprocessing.spawn(
+            _train_detector,
+            args=(
+                model,
+                cfg,
+                ds_train,
+                ds_train_tensors,
+                ds_val,
+                ds_val_tensors,
+                distributed,
+                timestamp,
+                meta,
+                validate,
+                _get_free_port(),
+            ),
+            nprocs=len(cfg.gpu_ids),
+        )
+    _train_detector(
+        0,
+        model,
+        cfg,
+        ds_train,
+        ds_train_tensors,
+        ds_val,
+        ds_val_tensors,
+        distributed,
+        timestamp,
+        meta,
+        validate,
+    )
+
+
+def _train_detector(
+    local_rank,
+    model,
+    cfg: mmcv.ConfigDict,
+    ds_train=None,
+    ds_train_tensors=None,
+    ds_val: Optional[dp.Dataset] = None,
+    ds_val_tensors=None,
+    distributed: bool = False,
+    timestamp=None,
+    meta=None,
+    validate: bool = True,
+    port=None,
+):
+    batch_size = cfg.data.get("samples_per_gpu", 256)
+    num_workers = cfg.data.get("workers_per_gpu", 1)
+
+    if ds_train is None:
+        ds_train = load_ds_from_cfg(cfg.data.train)
+        ds_train_tensors = cfg.data.train.get("deeplake_tensors", {})
+    else:
+        cfg_data = cfg.data.train.get("deeplake_path")
+        if cfg_data:
+            always_warn(
+                "A Deep Lake dataset was specified in the cfg as well as inthe dataset input to train_detector. The dataset input to train_detector will be used in the workflow."
+            )
+
+    eval_cfg = cfg.get("evaluation", {})
+    if ds_train_tensors:
+        train_images_tensor = ds_train_tensors["img"]
+        train_boxes_tensor = ds_train_tensors["gt_bboxes"]
+        train_labels_tensor = ds_train_tensors["gt_labels"]
+        train_masks_tensor = ds_train_tensors.get("gt_masks")
+    else:
+        train_images_tensor = find_image_tensor(ds_train, mm_class="img")
+        train_boxes_tensor = find_tensor_with_htype(
+            ds_train, type_kind=TypeKind.BoundingBox, mm_class="gt_bboxes"
+        )
+        train_labels_tensor = find_tensor_with_htype(
+            ds_train, type_kind=TypeKind.ClassLabel, mm_class="train gt_labels"
+        )
+        train_masks_tensor = None
+
+        collection_keys = get_collect_keys(cfg)
+        if "gt_masks" in collection_keys:
+            train_masks_tensor = find_tensor_with_htype(
+                ds_train, type_kind=TypeKind.BinaryMask, mm_class="gt_masks"
+            ) or find_tensor_with_htype(
+                ds_train, type_kind=TypeKind.Polygon, mm_class="gt_masks"
+            )
+
+    # TODO verify required tensors are not None and raise Exception.
+    if hasattr(model, "CLASSES"):
+        warnings.warn(
+            "model already has a CLASSES attribute. dataset.info.class_names will not be used."
+        )
+    elif "class_names" in dict(ds_train[train_labels_tensor].metadata):
+        model.CLASSES = ds_train[train_labels_tensor].metadata["class_names"]
+
+    metrics_format = cfg.get("deeplake_metrics_format", "COCO")
+
+    logger = get_root_logger(log_level=cfg.log_level)
+
+    runner_type = "EpochBasedRunner" if "runner" not in cfg else cfg.runner["type"]
+
+    train_dataloader_default_args = dict(
+        samples_per_gpu=batch_size,
+        workers_per_gpu=num_workers,
+        # `num_gpus` will be ignored if distributed
+        num_gpus=len(cfg.gpu_ids),
+        dist=distributed,
+        seed=cfg.seed,
+        runner_type=runner_type,
+        metrics_format=metrics_format,
+    )
+
+    train_loader_cfg = {
+        **train_dataloader_default_args,
+        **cfg.data.get("train_dataloader", {}),
+        **cfg.data.train.get("deeplake_dataloader", {}),
+    }
+
+    # put model on gpus
+    if distributed:
+        find_unused_parameters = cfg.get("find_unused_parameters", False)
+        # Sets the `find_unused_parameters` parameter in
+        # # torch.nn.parallel.DistributedDataParallel
+        # model = torch.nn.parallel.DistributedDataParallel(model.cuda(),
+        #                                           device_ids=[local_rank],
+        #                                           output_device=local_rank,
+        #                                           broadcast_buffers=False,
+        #                                           find_unused_parameters=find_unused_parameters)
+        force_cudnn_initialization(cfg.gpu_ids[local_rank])
+        ddp_setup(local_rank, len(cfg.gpu_ids), port)
+        model = build_ddp(
+            model,
+            cfg.device,
+            device_ids=[cfg.gpu_ids[local_rank]],
+            broadcast_buffers=False,
+            find_unused_parameters=find_unused_parameters,
+        )
+    else:
+        model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids)
+
+    train_pipeline = get_pipeline(cfg, name="train", generic_name="train_pipeline")
+
+    data_loader = build_dataloader(
+        ds_train,  # TO DO: convert it to for loop if we will suport concatting several datasets
+        train_images_tensor,
+        train_masks_tensor,
+        train_boxes_tensor,
+        train_labels_tensor,
+        pipeline=train_pipeline,
+        **train_loader_cfg,
+    )
+    # build optimizer
+    auto_scale_lr(cfg, distributed, logger)
+    optimizer = build_optimizer(model, cfg.optimizer)
+
+    cfg.custom_imports = dict(
+        imports=["deeplake.integrations.mm.mm_runners"],
+        allow_failed_imports=False,
+    )
+    if cfg.runner.type == "IterBasedRunner":
+        cfg.runner.type = "DeeplakeIterBasedRunner"
+    elif cfg.runner.type == "EpochBasedRunner":
+        cfg.runner.type = "DeeplakeEpochBasedRunner"
+
+    runner = build_runner(
+        cfg.runner,
+        default_args=dict(
+            model=model,
+            optimizer=optimizer,
+            work_dir=cfg.work_dir,
+            logger=logger,
+            meta=meta,
+            force_cleanup=True,
+        ),
+    )
+
+    # an ugly workaround to make .log and .log.json filenames the same
+    runner.timestamp = timestamp
+
+    # fp16 setting
+    fp16_cfg = cfg.get("fp16", None)
+    if fp16_cfg is not None:
+        optimizer_config = Fp16OptimizerHook(
+            **cfg.optimizer_config, **fp16_cfg, distributed=distributed
+        )
+    elif distributed and "type" not in cfg.optimizer_config:
+        optimizer_config = OptimizerHook(**cfg.optimizer_config)
+    else:
+        optimizer_config = cfg.optimizer_config
+
+    # register hooks
+    runner.register_training_hooks(
+        cfg.lr_config,
+        optimizer_config,
+        cfg.checkpoint_config,
+        cfg.log_config,
+        cfg.get("momentum_config", None),
+        custom_hooks_config=cfg.get("custom_hooks", None),
+    )
+
+    if distributed:
+        if isinstance(runner, EpochBasedRunner):
+            runner.register_hook(DistSamplerSeedHook())
+
+    # register eval hooks
+    if validate:
+        val_dataloader_default_args = dict(
+            samples_per_gpu=batch_size,
+            workers_per_gpu=num_workers,
+            dist=distributed,
+            seed=cfg.seed,
+            shuffle=False,
+            mode="val",
+            metrics_format=metrics_format,
+            num_gpus=len(cfg.gpu_ids),
+        )
+
+        val_dataloader_args = {
+            **cfg.data.val.get("deeplake_dataloader", {}),
+            **val_dataloader_default_args,
+        }
+
+        train_persistent_workers = train_loader_cfg.get("persistent_workers", False)
+        val_persistent_workers = val_dataloader_args.get("persistent_workers", False)
+        check_persistent_workers(train_persistent_workers, val_persistent_workers)
+
+        if val_dataloader_args.get("shuffle", False):
+            always_warn("shuffle argument for validation dataset will be ignored.")
+
+        if ds_val is None:
+            cfg_ds_val = cfg.data.get("val")
+            if cfg_ds_val is None or not any(
+                cfg_ds_val.get(key) is not None
+                for key in ["deeplake_path", "deeplake_query"]
+            ):
+                raise ValidationDatasetMissingError()
+
+            ds_val = load_ds_from_cfg(cfg.data.val)
+            ds_val_tensors = cfg.data.val.get("deeplake_tensors", {})
+        else:
+            cfg_data = cfg.data.val.get("deeplake_path")
+            if cfg_data is not None:
+                always_warn(
+                    "A Deep Lake dataset was specified in the cfg as well as in the dataset input to train_detector. The dataset input to train_detector will be used in the workflow."
+                )
+
+        if ds_val is None:
+            raise ValidationDatasetMissingError()
+
+        if val_dataloader_args["samples_per_gpu"] > 1:
+            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
+            cfg.data.val.pipeline = replace_ImageToTensor(cfg.data.val.pipeline)
+
+        if ds_val_tensors:
+            val_images_tensor = ds_val_tensors["img"]
+            val_boxes_tensor = ds_val_tensors["gt_bboxes"]
+            val_labels_tensor = ds_val_tensors["gt_labels"]
+            val_masks_tensor = ds_val_tensors.get("gt_masks")
+        else:
+            val_images_tensor = find_image_tensor(ds_val, mm_class="img")
+            val_boxes_tensor = find_tensor_with_htype(
+                ds_val, type_kind=TypeKind.BoundingBox, mm_class="gt_bboxes"
+            )
+            val_labels_tensor = find_tensor_with_htype(
+                ds_val, type_kind=TypeKind.ClassLabel, mm_class="gt_labels"
+            )
+            val_masks_tensor = None
+
+            collection_keys = get_collect_keys(cfg)
+            if "gt_masks" in collection_keys:
+                val_masks_tensor = find_tensor_with_htype(
+                    ds_val, type_kind=TypeKind.BinaryMask, mm_class="gt_masks"
+                ) or find_tensor_with_htype(
+                    ds_val, type_kind=TypeKind.Polygon, mm_class="gt_masks"
+                )
+
+        # TODO make sure required tensors are not None.
+        val_pipeline = get_pipeline(cfg, name="val", generic_name="test_pipeline")
+
+        val_dataloader = build_dataloader(
+            ds_val,
+            val_images_tensor,
+            val_masks_tensor,
+            val_boxes_tensor,
+            val_labels_tensor,
+            pipeline=val_pipeline,
+            **val_dataloader_args,
+        )
+
+        eval_cfg["by_epoch"] = cfg.runner["type"] != "DeeplakeIterBasedRunner"
+        eval_hook = EvalHook
+        if distributed:
+            eval_hook = DistEvalHook
+        # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the
+        # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'.
+        runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority="LOW")
+
+    resume_from = None
+    if cfg.resume_from is None and cfg.get("auto_resume"):
+        resume_from = find_latest_checkpoint(cfg.work_dir)
+    if resume_from is not None:
+        cfg.resume_from = resume_from
+
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from)
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run([data_loader], cfg.workflow)
diff --git a/python/deeplake/integrations/mmdet/mmdet_dataset_.py b/python/deeplake/integrations/mmdet/mmdet_dataset_.py
new file mode 100644
index 0000000000..eaa4183976
--- /dev/null
+++ b/python/deeplake/integrations/mmdet/mmdet_dataset_.py
@@ -0,0 +1,823 @@
+from collections import OrderedDict
+from typing import Callable, Optional, List, Dict, Sequence
+
+import os
+import math
+import types
+import torch
+import warnings
+import tempfile
+import numpy as np
+import os.path as osp
+
+from PIL import Image, ImageDraw  # type: ignore
+
+from terminaltables import AsciiTable  # type: ignore
+
+try:
+    from mmdet.apis.train import auto_scale_lr  # type: ignore
+except Exception:
+    import mmdet  # type: ignore
+
+    version = mmdet.__version__
+    raise Exception(
+        f"MMDet {version} version is not supported. The latest supported MMDet version with deeplake is 2.28.1."
+    )
+
+from mmdet.core import eval_map, eval_recalls
+from mmdet.core import BitmapMasks, PolygonMasks
+
+import mmcv  # type: ignore
+from mmcv.utils import print_log
+
+import deeplake as dp
+from deeplake.types import TypeKind
+
+from deeplake.integrations.mm.upcast_array import upcast_array
+from deeplake.integrations.mm.warnings import always_warn
+from deeplake.integrations.mmdet import mmdet_utils_
+
+from torch.utils.data import DataLoader
+
+# Monkey-patch the function
+from deeplake.integrations.mm.exceptions import InvalidImageError
+from deeplake.integrations.mmdet.test_ import single_gpu_test as custom_single_gpu_test
+from deeplake.integrations.mmdet.test_ import multi_gpu_test as custom_multi_gpu_test
+
+from torch.utils.data import Dataset
+
+
+def coco_pixel_2_pascal_pixel(boxes, shape):
+    """
+    Converts bounding boxes from COCO pixel format (x, y, width, height)
+    to Pascal VOC pixel format (x_min, y_min, x_max, y_max).
+
+    Clipping ensures the bounding boxes have non-negative width and height.
+
+    @param boxes: numpy array of shape (N, 4), containing bounding boxes in COCO format.
+    @param shape: tuple, the shape of the image (height, width).
+
+    @return: numpy array of shape (N, 4), bounding boxes in Pascal VOC format.
+    """
+    pascal_boxes = np.empty((0, 4), dtype=boxes.dtype)
+    if boxes.size != 0:
+        pascal_boxes = np.stack(
+            (
+                boxes[:, 0],
+                boxes[:, 1],
+                boxes[:, 0] + boxes[:, 2],
+                boxes[:, 1] + boxes[:, 3],
+            ),
+            axis=1,
+        )
+    return pascal_boxes
+
+
+def poly_2_mask(polygons, shape):
+    # TODO This doesnt fill the array inplace.    out = np.zeros(shape + (len(polygons),), dtype=np.uint8)
+    """
+    Converts a list of polygons into a binary mask.
+
+    @param polygons: list of polygons, where each polygon is a list of (x, y) coordinates.
+    @param shape: tuple, the shape of the mask (height, width).
+
+    @return: numpy array, binary mask of the same size as the image.
+    """
+    out = np.zeros(shape + (len(polygons),), dtype=np.uint8)
+    for i, polygon in enumerate(polygons):
+        im = Image.fromarray(out[..., i])
+        d = ImageDraw.Draw(im)
+        d.polygon(polygon, fill=1)
+        out[..., i] = np.asarray(im)
+    return out
+
+
+def coco_frac_2_pascal_pixel(boxes, shape):
+    """
+    Converts bounding boxes from fractional COCO format (relative to image size)
+    to Pascal VOC pixel format.
+
+    @param boxes: numpy array of shape (N, 4), bounding boxes in fractional COCO format.
+    @param shape: tuple, the shape of the image (height, width).
+
+    @return: numpy array of shape (N, 4), bounding boxes in Pascal VOC format.
+    """
+    bbox = np.empty((0, 4), dtype=boxes.dtype)
+    if boxes.size != 0:
+        x = boxes[:, 0] * shape[1]
+        y = boxes[:, 1] * shape[0]
+        w = boxes[:, 2] * shape[1]
+        h = boxes[:, 3] * shape[0]
+        bbox = np.stack((x, y, w, h), axis=1)
+    return coco_pixel_2_pascal_pixel(bbox, shape)
+
+
+def pascal_frac_2_pascal_pixel(boxes, shape):
+    """
+    Converts bounding boxes from fractional Pascal VOC format (LTRB)
+    to pixel Pascal VOC format.
+
+    @param boxes: numpy array of shape (N, 4), bounding boxes in fractional format.
+    @param shape: tuple, the shape of the image (height, width).
+
+    @return: numpy array of shape (N, 4), bounding boxes in pixel format.
+    """
+    bbox = np.empty((0, 4), dtype=boxes.dtype)
+    if boxes.size != 0:
+        x_top = boxes[:, 0] * shape[1]
+        y_top = boxes[:, 1] * shape[0]
+        x_bottom = boxes[:, 2] * shape[1]
+        y_bottom = boxes[:, 3] * shape[0]
+        bbox = np.stack((x_top, y_top, x_bottom, y_bottom), axis=1)
+    return bbox
+
+
+def yolo_pixel_2_pascal_pixel(boxes, shape):
+    """
+    Converts bounding boxes from YOLO pixel format (center_x, center_y, width, height)
+    to Pascal VOC pixel format (LTRB).
+
+    @param boxes: numpy array of shape (N, 4), bounding boxes in YOLO format.
+    @param shape: tuple, the shape of the image (height, width).
+
+    @return: numpy array of shape (N, 4), bounding boxes in Pascal VOC format.
+    """
+    bbox = np.empty((0, 4), dtype=boxes.dtype)
+    if boxes.size != 0:
+        x_top = np.array(boxes[:, 0]) - np.floor(np.array(boxes[:, 2]) / 2)
+        y_top = np.array(boxes[:, 1]) - np.floor(np.array(boxes[:, 3]) / 2)
+        x_bottom = np.array(boxes[:, 0]) + np.floor(np.array(boxes[:, 2]) / 2)
+        y_bottom = np.array(boxes[:, 1]) + np.floor(np.array(boxes[:, 3]) / 2)
+        bbox = np.stack((x_top, y_top, x_bottom, y_bottom), axis=1)
+    return bbox
+
+
+def yolo_frac_2_pascal_pixel(boxes, shape):
+    """
+    Converts bounding boxes from YOLO fractional format to Pascal VOC pixel format.
+
+    @param boxes: numpy array of shape (N, 4), bounding boxes in YOLO fractional format.
+    @param shape: tuple, the shape of the image (height, width).
+
+    @return: numpy array of shape (N, 4), bounding boxes in Pascal VOC format.
+    """
+    bbox = np.empty((0, 4), dtype=boxes.dtype)
+    if boxes.size != 0:
+        x_center = boxes[:, 0] * shape[1]
+        y_center = boxes[:, 1] * shape[0]
+        width = boxes[:, 2] * shape[1]
+        height = boxes[:, 3] * shape[0]
+        bbox = np.stack((x_center, y_center, width, height), axis=1)
+    return yolo_pixel_2_pascal_pixel(bbox, shape)
+
+
+def get_bbox_format(bbox, bbox_info):
+    bbox_info = bbox_info.get("coords")
+    if not bbox_info:
+        bbox_info = {}
+    mode = bbox_info.get("mode", "LTWH")
+    type = bbox_info.get("type", "pixel")
+
+    if len(bbox_info) == 0 and np.mean(bbox) < 1:
+        mode = "CCWH"
+        type = "fractional"
+    return (mode, type)
+
+
+BBOX_FORMAT_TO_PASCAL_CONVERTER = {
+    ("LTWH", "pixel"): coco_pixel_2_pascal_pixel,
+    ("LTWH", "fractional"): coco_frac_2_pascal_pixel,
+    ("LTRB", "pixel"): lambda x, y: x,
+    ("LTRB", "fractional"): pascal_frac_2_pascal_pixel,
+    ("CCWH", "pixel"): yolo_pixel_2_pascal_pixel,
+    ("CCWH", "fractional"): yolo_frac_2_pascal_pixel,
+}
+
+
+def convert_to_pascal_format(bbox, bbox_info, shape):
+    bbox_format = get_bbox_format(bbox, bbox_info)
+    converter = BBOX_FORMAT_TO_PASCAL_CONVERTER[bbox_format]
+    return converter(bbox, shape)
+
+
+def pascal_pixel_2_coco_pixel(boxes, images):
+    """
+    Converts bounding boxes from Pascal VOC pixel format (LTRB)
+    to COCO pixel format (x, y, width, height).
+
+    @param boxes: numpy array of images (N, 4), bounding boxes in Pascal VOC format.
+    @param images: tuple, the images of the image (height, width).
+
+    @return: numpy array of images (N, 4), bounding boxes in COCO pixel format.
+    """
+    pascal_boxes = []
+    for box in boxes:
+        if box.size != 0:
+            pascal_boxes.append(
+                np.stack(
+                    (
+                        box[:, 0],
+                        box[:, 1],
+                        box[:, 2] - box[:, 0],
+                        box[:, 3] - box[:, 1],
+                    ),
+                    axis=1,
+                )
+            )
+        else:
+            pascal_boxes.append(box)
+    return pascal_boxes
+
+
+def pascal_frac_2_coco_pixel(boxes, images):
+    pascal_pixel_boxes = []
+    for i, box in enumerate(boxes):
+        if box.size != 0:
+            shape = images[i].shape
+            x_top = box[:, 0] * shape[1]
+            y_top = box[:, 1] * shape[0]
+            x_bottom = box[:, 2] * shape[1]
+            y_bottom = box[:, 3] * shape[0]
+            bbox = np.stack((x_top, y_top, x_bottom, y_bottom), axis=1)
+        pascal_pixel_boxes.append(bbox)
+    return pascal_pixel_2_coco_pixel(pascal_pixel_boxes, images)
+
+
+def yolo_pixel_2_coco_pixel(boxes, images):
+    yolo_boxes = []
+    for box in boxes:
+        if box.size != 0:
+            x_top = np.array(box[:, 0]) - np.floor(np.array(box[:, 2]) / 2)
+            y_top = np.array(box[:, 1]) - np.floor(np.array(box[:, 3]) / 2)
+            w = box[:, 2]
+            h = box[:, 3]
+            bbox = np.stack([x_top, y_top, w, h], axis=1)
+        yolo_boxes.append(bbox)
+    return yolo_boxes
+
+
+def yolo_frac_2_coco_pixel(boxes, images):
+    yolo_boxes = []
+    for i, box in enumerate(boxes):
+        shape = images[i].shape
+        x_center = box[:, 0] * shape[1]
+        y_center = box[:, 1] * shape[0]
+        width = box[:, 2] * shape[1]
+        height = box[:, 3] * shape[0]
+        bbox = np.stack((x_center, y_center, width, height), axis=1)
+        yolo_boxes.append(bbox)
+    return yolo_pixel_2_coco_pixel(yolo_boxes, images)
+
+
+def coco_frac_2_coco_pixel(boxes, images):
+    coco_pixel_boxes = []
+    for i, box in enumerate(boxes):
+        shape = images[i].shape
+        x = box[:, 0] * shape[1]
+        y = box[:, 1] * shape[0]
+        w = box[:, 2] * shape[1]
+        h = box[:, 3] * shape[0]
+        bbox = np.stack((x, y, w, h), axis=1)
+        coco_pixel_boxes.append(bbox)
+    return np.array(coco_pixel_boxes)
+
+
+BBOX_FORMAT_TO_COCO_CONVERTER = {
+    ("LTWH", "pixel"): lambda x, y: x,
+    ("LTWH", "fractional"): coco_frac_2_coco_pixel,
+    ("LTRB", "pixel"): pascal_pixel_2_coco_pixel,
+    ("LTRB", "fractional"): pascal_frac_2_coco_pixel,
+    ("CCWH", "pixel"): yolo_pixel_2_coco_pixel,
+    ("CCWH", "fractional"): yolo_frac_2_coco_pixel,
+}
+
+
+def convert_to_coco_format(bbox, bbox_format, images):
+    converter = BBOX_FORMAT_TO_COCO_CONVERTER[bbox_format]
+    return converter(bbox, images)
+
+
+def first_non_empty(bboxes):
+    for box in bboxes:
+        if len(box):
+            return box
+    raise ValueError("Empty bboxes")
+
+
+def transform(
+    sample_in,
+    images_tensor: str,
+    masks_tensor: str,
+    boxes_tensor: str,
+    labels_tensor: str,
+    pipeline: Callable,
+    bbox_info: str,
+    poly2mask: bool,
+):
+    img = upcast_array(sample_in[images_tensor])
+    if not isinstance(img, np.ndarray):
+        img = np.array(img)
+
+    bboxes = upcast_array(sample_in[boxes_tensor])
+    # TODO bbox format should be recognized outside the transform, not per sample basis.
+    bboxes = convert_to_pascal_format(bboxes, bbox_info, img.shape)
+    if bboxes.shape == (0, 0):  # TO DO: remove after bug will be fixed
+        bboxes = np.empty((0, 4), dtype=sample_in[boxes_tensor].dtype)
+
+    labels = upcast_array(sample_in[labels_tensor])
+
+    if img.ndim == 2:
+        img = np.expand_dims(img, -1)
+
+    img = img[..., ::-1]  # rgb_to_bgr should be optional
+    if img.shape[2] == 1:
+        img = np.repeat(img, 3, axis=2)
+    shape = img.shape
+
+    pipeline_dict = {
+        "img": np.ascontiguousarray(img, dtype=np.float32),
+        "img_fields": ["img"],
+        "filename": None,
+        "ori_filename": None,
+        "img_shape": shape,
+        "ori_shape": shape,
+        "gt_bboxes": bboxes,
+        "gt_labels": labels,
+        "bbox_fields": ["gt_bboxes"],
+    }
+
+    if masks_tensor:
+        masks = upcast_array(sample_in[masks_tensor])
+        if poly2mask:
+            masks = mmdet_utils_.convert_poly_to_coco_format(masks)
+            masks = PolygonMasks(
+                [process_polygons(polygons) for polygons in masks], shape[0], shape[1]
+            )
+        else:
+            masks = BitmapMasks(masks.astype(np.uint8).transpose(2, 0, 1), *shape[:2])
+
+        pipeline_dict["gt_masks"] = masks
+        pipeline_dict["mask_fields"] = ["gt_masks"]
+    return pipeline(pipeline_dict)
+
+
+def process_polygons(polygons):
+    """Convert polygons to list of ndarray and filter invalid polygons.
+
+    Args:
+        polygons (list[list]): Polygons of one instance.
+
+    Returns:
+        list[numpy.ndarray]: Processed polygons.
+    """
+
+    polygons = [np.array(p) for p in polygons]
+    valid_polygons = []
+    for polygon in polygons:
+        if len(polygon) % 2 == 0 and len(polygon) >= 6:
+            valid_polygons.append(polygon)
+    return valid_polygons
+
+
+class MMDetTorchDataset(Dataset):
+    def __init__(
+        self,
+        dataset,
+        tensors: Optional[Sequence[str]] = None,
+        transform: Optional[Callable] = None,
+    ) -> None:
+        super().__init__()
+        self.dataset = dataset
+        self.transform = transform
+        self.column_names = [col.name for col in self.dataset.schema.columns]
+        self.last_successful_index = -1
+
+    def __getstate__(self):
+        return {
+            "dataset": self.dataset,
+            "transform": self.transform,
+            "column_names": self.column_names,
+            "last_successful_index": self.last_successful_index,
+        }
+
+    def __setstate__(self, state):
+        """Restore state from pickled state."""
+        if hasattr(super(), "__setstate__"):
+            super().__setstate__(state)
+
+        self.dataset = state["dataset"]
+        self.transform = state["transform"]
+        self.column_names = state["column_names"]
+        self.last_successful_index = state["last_successful_index"]
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def __getitem__(self, idx):
+        while True:
+            try:
+                sample = self.dataset[idx]
+                if self.transform:
+                    return self.transform(sample)
+                else:
+                    out = {}
+                    for col in self.column_names:
+                        out[col] = sample[col]
+                    return out
+            except InvalidImageError as e:
+                print(f"Error processing data at index {idx}: {e}")
+                if self.last_successful_index == -1:
+                    self.last_successful_index = idx + 1
+                idx = self.last_successful_index
+                continue
+
+
+class MMDetDataset(MMDetTorchDataset):
+    def __init__(
+        self,
+        *args,
+        tensors_dict=None,
+        mode="train",
+        metrics_format="COCO",
+        bbox_info=None,
+        pipeline=None,
+        num_gpus=1,
+        batch_size=1,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+        self.mode = mode
+        self.pipeline = pipeline
+        self.num_gpus = num_gpus
+        self.batch_size = batch_size
+        self.tensors_dict = tensors_dict
+        self.bbox_info = bbox_info
+        if self.mode in ("val", "test"):
+            self.images = self._get_images(self.tensors_dict["images_tensor"])
+            masks = self._get_masks(self.tensors_dict.get("masks_tensor", None))
+            masks_type_kind = (
+                self.dataset.schema[masks.name].dtype.kind
+                if masks is not None and masks != []
+                else None
+            )
+            self.masks_type_kind = masks_type_kind
+            self.masks = masks[:]
+            self.bboxes = self._get_bboxes(self.tensors_dict["boxes_tensor"])
+            bbox_format = get_bbox_format(first_non_empty(self.bboxes), bbox_info)
+            self.labels = self._get_labels(self.tensors_dict["labels_tensor"])
+            self.iscrowds = self._get_iscrowds(self.tensors_dict.get("iscrowds"))
+            self.CLASSES = self.get_classes(self.tensors_dict["labels_tensor"])
+            self.metrics_format = metrics_format
+            coco_style_bbox = convert_to_coco_format(
+                self.bboxes, bbox_format, self.images
+            )
+
+            if self.metrics_format == "COCO":
+                self.evaluator = mmdet_utils_.COCODatasetEvaluater(
+                    pipeline,
+                    classes=self.CLASSES,
+                    deeplake_dataset=self.dataset,
+                    imgs=self.images,
+                    masks=self.masks,
+                    masks_type_kind=self.masks_type_kind,
+                    bboxes=coco_style_bbox,
+                    labels=self.labels,
+                    iscrowds=self.iscrowds,
+                    bbox_format=bbox_format,
+                    num_gpus=num_gpus,
+                )
+            else:
+                self.evaluator = None
+
+    def __getstate__(self):
+        """Prepare state for pickling."""
+        state = super().__getstate__() if hasattr(super(), "__getstate__") else {}
+
+        state.update(
+            {
+                "mode": self.mode,
+                "pipeline": self.pipeline,
+                "num_gpus": self.num_gpus,
+                "batch_size": self.batch_size,
+                "tensors_dict": self.tensors_dict,
+                "bbox_info": self.bbox_info,
+            }
+        )
+        return state
+
+    def __setstate__(self, state):
+        """Restore state from pickled state."""
+        if hasattr(super(), "__setstate__"):
+            super().__setstate__(state)
+
+        self.mode = state["mode"]
+        self.pipeline = state["pipeline"]
+        self.num_gpus = state["num_gpus"]
+        self.batch_size = state["batch_size"]
+        self.tensors_dict = state["tensors_dict"]
+        self.bbox_info = state["bbox_info"]
+
+        if self.mode in ("val", "test"):
+            self.images = self._get_images(self.tensors_dict["images_tensor"])
+            masks = self._get_masks(self.tensors_dict.get("masks_tensor", None))
+            masks_type_kind = (
+                self.dataset.schema[masks.name].dtype.kind
+                if masks is not None and masks != []
+                else None
+            )
+            self.masks_type_kind = masks_type_kind
+            self.masks = masks[:]
+            self.bboxes = self._get_bboxes(self.tensors_dict["boxes_tensor"])
+            bbox_format = get_bbox_format(first_non_empty(self.bboxes), bbox_info)
+            self.labels = self._get_labels(self.tensors_dict["labels_tensor"])
+            self.iscrowds = self._get_iscrowds(self.tensors_dict.get("iscrowds"))
+            self.CLASSES = self.get_classes(self.tensors_dict["labels_tensor"])
+            self.metrics_format = metrics_format
+            coco_style_bbox = convert_to_coco_format(
+                self.bboxes, bbox_format, self.images
+            )
+
+            if self.metrics_format == "COCO":
+                self.evaluator = mmdet_utils_.COCODatasetEvaluater(
+                    pipeline,
+                    classes=self.CLASSES,
+                    deeplake_dataset=self.dataset,
+                    imgs=self.images,
+                    masks=self.masks,
+                    masks_type_kind=self.masks_type_kind,
+                    bboxes=coco_style_bbox,
+                    labels=self.labels,
+                    iscrowds=self.iscrowds,
+                    bbox_format=bbox_format,
+                    num_gpus=num_gpus,
+                )
+            else:
+                self.evaluator = None
+
+    def __len__(self):
+        if self.mode == "val":
+            per_gpu_length = math.floor(
+                len(self.dataset) / (self.batch_size * self.num_gpus)
+            )
+            total_length = per_gpu_length * self.num_gpus
+            return total_length
+        return super().__len__()
+
+    def _get_images(self, images_tensor):
+        image_tensor = self.dataset[images_tensor]
+        return image_tensor
+
+    def _get_masks(self, masks_tensor):
+        if masks_tensor is None:
+            return []
+        return self.dataset[masks_tensor]
+
+    def _get_iscrowds(self, iscrowds_tensor):
+        if iscrowds_tensor is not None:
+            return iscrowds_tensor
+
+        if "iscrowds" in [col.name for col in self.dataset.schema.columns]:
+            always_warn(
+                "Iscrowds was not specified, searching for iscrowds tensor in the dataset."
+            )
+            return self.dataset["iscrowds"][:]
+        always_warn("iscrowds tensor was not found, setting its value to 0.")
+        return iscrowds_tensor
+
+    def _get_bboxes(self, boxes_tensor):
+        return self.dataset[boxes_tensor][:]
+
+    def _get_labels(self, labels_tensor):
+        return self.dataset[labels_tensor][:]
+
+    def _get_class_names(self, labels_tensor):
+        return self.dataset[labels_tensor].metadata["class_names"]
+
+    def get_ann_info(self, idx):
+        """Get annotation by index.
+
+        Args:
+            idx (int): Index of data.
+
+        Raises:
+            ValueError: when ``self.metrics`` is not valid.
+
+        Returns:
+            dict: Annotation info of specified index.
+        """
+        bboxes = convert_to_pascal_format(
+            self.bboxes[idx], self.bbox_info, self.images[idx].shape
+        )
+        return {"bboxes": bboxes, "labels": self.labels[idx]}
+
+    def get_cat_ids(self, idx):
+        """Get category ids by index.
+
+        Args:
+            idx (int): Index of data.
+
+        Returns:
+            list[int]: All categories in the image of specified index.
+        """
+
+        cat_ids = self.labels[idx].astype(np.int).tolist()
+
+        return cat_ids
+
+    def _filter_imgs(self, min_size=32):
+        """Filter images too small."""
+        if self.filter_empty_gt:
+            warnings.warn("CustomDataset does not support filtering empty gt images.")
+        valid_inds = []
+        for i, img_info in enumerate(self.data_infos):
+            if min(img_info["width"], img_info["height"]) >= min_size:
+                valid_inds.append(i)
+        return valid_inds
+
+    def get_classes(self, classes):
+        """Get class names of current dataset.
+
+        Args:
+            classes (str): Reresents the name of the classes tensor. Overrides the CLASSES defined by the dataset.
+
+        Returns:
+            list[str]: Names of categories of the dataset.
+        """
+        return self.dataset[classes].metadata["class_names"]
+
+    def evaluate(
+        self,
+        results,
+        metric="mAP",
+        logger=None,
+        proposal_nums=(100, 300, 1000),
+        iou_thr=0.5,  #
+        scale_ranges=None,
+        **kwargs,
+    ):
+        """Evaluate the dataset.
+
+        Args:
+            **kwargs (dict): Keyword arguments to pass to self.evaluate object
+            results (list): Testing results of the dataset.
+            metric (str | list[str]): Metrics to be evaluated.
+            logger (logging.Logger | None | str): Logger used for printing
+                related information during evaluation. Default: None.
+            proposal_nums (Sequence[int]): Proposal number used for evaluating
+                recalls, such as recall@100, recall@1000.
+                Default: (100, 300, 1000).
+            iou_thr (float | list[float]): IoU threshold. Default: 0.5.
+            scale_ranges (list[tuple] | None): Scale ranges for evaluating mAP.
+                Default: None.
+
+        Raises:
+            KeyError: if a specified metric format is not supported
+
+        Returns:
+            OrderedDict: Evaluation metrics dictionary
+        """
+        if self.num_gpus > 1:
+            results_ordered = []
+            for i in range(self.num_gpus):
+                results_ordered += results[i :: self.num_gpus]
+            results = results_ordered
+
+        if self.evaluator is None:
+            if not isinstance(metric, str):
+                assert len(metric) == 1
+                metric = metric[0]
+            allowed_metrics = ["mAP", "recall"]
+            if metric not in allowed_metrics:
+                raise KeyError(f"metric {metric} is not supported")
+            annotations = [
+                self.get_ann_info(i) for i in range(len(self))
+            ]  # directly evaluate from hub
+            eval_results = OrderedDict()
+            iou_thrs = [iou_thr] if isinstance(iou_thr, float) else iou_thr
+            if metric == "mAP":
+                assert isinstance(iou_thrs, list)
+                mean_aps = []
+                for iou_thr in iou_thrs:
+                    print_log(f'\n{"-" * 15}iou_thr: {iou_thr}{"-" * 15}')
+                    mean_ap, _ = eval_map(
+                        results,
+                        annotations,
+                        scale_ranges=scale_ranges,
+                        iou_thr=iou_thr,
+                        dataset=self.CLASSES,
+                        logger=logger,
+                    )
+                    mean_aps.append(mean_ap)
+                    eval_results[f"AP{int(iou_thr * 100):02d}"] = round(mean_ap, 3)
+                eval_results["mAP"] = sum(mean_aps) / len(mean_aps)
+            elif metric == "recall":
+                gt_bboxes = [ann["bboxes"] for ann in annotations]  # evaluate from hub
+                recalls = eval_recalls(
+                    gt_bboxes, results, proposal_nums, iou_thr, logger=logger
+                )
+                for i, num in enumerate(proposal_nums):
+                    for j, iou in enumerate(iou_thrs):
+                        eval_results[f"recall@{num}@{iou}"] = recalls[i, j]
+                if recalls.shape[1] > 1:
+                    ar = recalls.mean(axis=1)
+                    for i, num in enumerate(proposal_nums):
+                        eval_results[f"AR@{num}"] = ar[i]
+            return eval_results
+
+        return self.evaluator.evaluate(
+            results,
+            metric=metric,
+            logger=logger,
+            proposal_nums=proposal_nums,
+            **kwargs,
+        )
+
+    @staticmethod
+    def _coco_2_pascal(boxes):
+        # Convert bounding boxes to Pascal VOC format and clip bounding boxes to make sure they have non-negative width and height
+        return np.stack(
+            (
+                boxes[:, 0],
+                boxes[:, 1],
+                boxes[:, 0] + boxes[:, 2],
+                boxes[:, 1] + boxes[:, 3],
+            ),
+            axis=1,
+        )
+
+    def __repr__(self):
+        """Print the number of instance number."""
+        dataset_type = "Test"
+        #  if self.test_mode else "Train"
+        result = (
+            f"\n{self.__class__.__name__} {dataset_type} dataset "
+            f"with number of images {len(self)}, "
+            f"and instance counts: \n"
+        )
+        if self.CLASSES is None:
+            result += "Category names are not provided. \n"
+            return result
+        instance_count = np.zeros(len(self.CLASSES) + 1).astype(int)
+        # count the instance number in each image
+        for idx in range(len(self)):
+            label = self.get_ann_info(idx)["labels"]  # change this
+            unique, counts = np.unique(label, return_counts=True)
+            if len(unique) > 0:
+                # add the occurrence number to each class
+                instance_count[unique] += counts
+            else:
+                # background is the last index
+                instance_count[-1] += 1
+        # create a table with category count
+        table_data = [["category", "count"] * 5]
+        row_data = []
+        for cls, count in enumerate(instance_count):
+            if cls < len(self.CLASSES):
+                row_data += [f"{cls} [{self.CLASSES[cls]}]", f"{count}"]
+            else:
+                # add the background number
+                row_data += ["-1 background", f"{count}"]
+            if len(row_data) == 10:
+                table_data.append(row_data)
+                row_data = []
+        if len(row_data) >= 2:
+            if row_data[-1] == "0":
+                row_data = row_data[:-2]
+            if len(row_data) >= 2:
+                table_data.append([])
+                table_data.append(row_data)
+
+        table = AsciiTable(table_data)
+        result += table.table
+        return result
+
+    def format_results(self, results, jsonfile_prefix=None, **kwargs):
+        """Format the results to json (standard format for COCO evaluation).
+
+        Args:
+            results (list[tuple | numpy.ndarray]): Testing results of the
+                dataset.
+            jsonfile_prefix (str | None): The prefix of json files. It includes
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+            kwargs (dict): Additional keyword arguments to be passed.
+
+        Returns:
+            tuple: (result_files, tmp_dir), result_files is a dict containing
+                the json filepaths, tmp_dir is the temporal directory created
+                for saving json files when jsonfile_prefix is not specified.
+        """
+        assert isinstance(results, list), "results must be a list"
+        assert len(results) == len(
+            self
+        ), "The length of results is not equal to the dataset len: {} != {}".format(
+            len(results), len(self)
+        )
+
+        if jsonfile_prefix is None:
+            tmp_dir = tempfile.TemporaryDirectory()
+            jsonfile_prefix = osp.join(tmp_dir.name, "results")
+        else:
+            tmp_dir = None
+        result_files = self.results2json(results, jsonfile_prefix)
+        return result_files, tmp_dir
diff --git a/python/deeplake/integrations/mmdet/mmdet_utils_.py b/python/deeplake/integrations/mmdet/mmdet_utils_.py
new file mode 100644
index 0000000000..741a1128a8
--- /dev/null
+++ b/python/deeplake/integrations/mmdet/mmdet_utils_.py
@@ -0,0 +1,498 @@
+import time
+import warnings
+import pycocotools  # type: ignore
+import numpy as np
+import copy
+import itertools
+import pycocotools.mask as maskUtils  # type: ignore
+from pycocotools import coco as pycocotools_coco  # type: ignore
+from pycocotools import mask as _mask
+from collections import defaultdict
+import sys
+from typing import Union, Dict, List
+
+PYTHON_VERSION = sys.version_info[0]
+if PYTHON_VERSION == 2:
+    from urllib import urlretrieve  # type: ignore
+elif PYTHON_VERSION == 3:
+    from urllib.request import urlretrieve
+from mmdet.datasets import coco as mmdet_coco  # type: ignore
+from mmdet.datasets import pipelines
+from deeplake.integrations.mm.warnings import always_warn
+from deeplake.types import TypeKind
+import json
+import mmcv  # type: ignore
+import math
+from tqdm import tqdm
+
+
+def _isArrayLike(obj):
+    return hasattr(obj, "__iter__") and hasattr(obj, "__len__")
+
+
+class _COCO(pycocotools_coco.COCO):
+    def __init__(
+        self,
+        deeplake_dataset=None,
+        imgs=None,
+        masks=None,
+        masks_type_kind=None,
+        bboxes=None,
+        labels=None,
+        iscrowds=None,
+        class_names=None,
+        bbox_format=("LTRB", "pixel"),
+    ):
+        """
+        Constructor of Microsoft COCO helper class for reading and visualizing annotations.
+        :param annotation_file (str): location of annotation file
+        :param image_folder (str): location to the folder that hosts images.
+        :return:
+        """
+        self.masks = masks
+        self.masks_type_kind = masks_type_kind
+        self.bboxes = bboxes
+        self.labels = labels
+        self.imgs_orig = imgs
+        self.iscrowds = iscrowds
+        self.class_names = class_names
+        self.bbox_format = bbox_format
+
+        # load dataset
+        self.anns, self.cats, self.imgs = dict(), dict(), dict()
+        self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
+        print("loading annotations into memory...")
+        self.dataset = deeplake_dataset
+        if self.dataset is not None:
+            self.createDeeplakeIndex()
+
+    def createDeeplakeIndex(self):
+        # create index
+        print("creating index...")
+        anns, cats, imgs = {}, {}, {}
+        imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
+        absolute_id = 0
+        all_categories = self.labels
+        all_bboxes = self.bboxes
+        all_masks = self.masks
+        all_imgs = self.imgs_orig
+        all_iscrowds = self.iscrowds
+
+        for row_index, row in tqdm(
+            enumerate(self.dataset),
+            desc="loading annotations",
+            total=len(self.dataset),
+        ):
+            if all_imgs[row_index].size == 0:
+                always_warn(
+                    "found empty image, skipping it. Please verify that your dataset is not corrupted."
+                )
+                continue
+            categories = all_categories[row_index]  # make referencig custom
+            bboxes = all_bboxes[row_index]
+            if all_masks != [] and all_masks is not None:
+                masks = all_masks[row_index]
+            else:
+                masks = None
+            if all_iscrowds is not None:
+                is_crowds = all_iscrowds[row_index]
+            else:
+                is_crowds = np.zeros_like(categories)
+            img = {
+                "id": row_index,
+                "height": all_imgs[row_index].shape[0],
+                "width": all_imgs[row_index].shape[1],
+            }
+            imgs[row_index] = img
+            for bbox_index, bbox in enumerate(bboxes):
+                if self.masks is not None and self.masks != []:
+                    if self.masks_type_kind == TypeKind.BinaryMask:
+                        if masks.size == 0:
+                            mask = _mask.encode(np.asfortranarray(masks[:]))
+                        else:
+                            mask = _mask.encode(
+                                np.asfortranarray(masks[..., bbox_index])
+                            )
+
+                    elif self.masks_type_kind == TypeKind.Polygon:
+                        mask = convert_poly_to_coco_format(masks[:][bbox_index])
+                    else:
+                        raise Exception(
+                            f"type_kind={self.masks_type_kind} is not supported yet."
+                        )
+                ann = {
+                    "image_id": row_index,
+                    "id": absolute_id,
+                    "category_id": categories[bbox_index],
+                    "bbox": bbox,
+                    "area": bbox[2] * bbox[3],
+                    "segmentation": (
+                        mask if masks is not None else None
+                    ),  # optimize here
+                    "iscrowd": int(is_crowds[bbox_index]),
+                }
+
+                imgToAnns[row_index].append(ann)
+                anns[absolute_id] = ann
+                absolute_id += 1
+
+        category_names = self.class_names  # TO DO: add super category names
+        category_names = [
+            {"id": cat_id, "name": name} for cat_id, name in enumerate(category_names)
+        ]
+
+        for idx, category_name in enumerate(category_names):
+            cats[idx] = category_name
+
+        for ann in anns.values():
+            catToImgs[ann["category_id"]].append(ann["image_id"])
+
+        # create class members
+        self.anns = anns
+        self.imgToAnns = imgToAnns
+        self.catToImgs = catToImgs
+        self.imgs = imgs
+        self.cats = cats
+        print("create index done!")
+
+    def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
+        """
+        Get ann ids that satisfy given filter conditions. default skips that filter
+        :param imgIds  (int array)     : get anns for given imgs
+               catIds  (int array)     : get anns for given cats
+               areaRng (float array)   : get anns for given area range (e.g. [0 inf])
+               iscrowd (boolean)       : get anns for given crowd label (False or True)
+        :return: ids (int array)       : integer array of ann ids
+        """
+        imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
+        catIds = catIds if _isArrayLike(catIds) else [catIds]
+
+        if len(imgIds) == len(catIds) == len(areaRng) == 0:
+            anns = list(self.anns.values())
+        else:
+            if not len(imgIds) == 0:
+                lists = [
+                    self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns
+                ]
+                anns = list(itertools.chain.from_iterable(lists))
+            else:
+                anns = list(self.anns.values())
+            anns = (
+                anns
+                if len(catIds) == 0
+                else [ann for ann in anns if ann["category_id"] in catIds]
+            )
+            anns = (
+                anns
+                if len(areaRng) == 0
+                else [
+                    ann
+                    for ann in anns
+                    if ann["area"] > areaRng[0] and ann["area"] < areaRng[1]
+                ]
+            )
+        if not iscrowd == None:
+            ids = [ann["id"] for ann in anns.values() if ann["iscrowd"] == iscrowd]
+        else:
+            ids = [ann["id"] for ann in anns]
+        return ids
+
+    def getCatIds(self, catNms: List = [], supNms: List = [], catIds: List = []):
+        """Filtering parameters.
+
+        Args:
+            catNms (List): get cats for given cat names
+            supNms (List): get classes for given supercategory names
+            catIds (List): get cats for given cat ids
+
+        Returns:
+            ids (List[int]): integer array of cat ids
+        """
+        catNms = catNms if _isArrayLike(catNms) else [catNms]
+        supNms = supNms if _isArrayLike(supNms) else [supNms]
+        catIds = catIds if _isArrayLike(catIds) else [catIds]
+
+        if len(catNms) == len(supNms) == len(catIds) == 0:
+            cats = list(self.cats.values())
+        else:
+            cats = list(self.cats.values())
+            cats = (
+                cats
+                if len(catNms) == 0
+                else [cat for cat in cats if cat["name"] in catNms]
+            )
+            cats = (
+                cats
+                if len(supNms) == 0
+                else [cat for cat in cats if cat["supercategory"] in supNms]
+            )
+            cats = (
+                cats
+                if len(catIds) == 0
+                else [cat for cat in cats if cat["id"] in catIds]
+            )
+        ids = [cat["id"] for cat in cats]
+        return ids
+
+    def loadRes(self, resFile):
+        """
+        Load result file and return a result api object.
+        :param   resFile (str)     : file name of result file
+        :return: res (obj)         : result api object
+        """
+        res = _COCO()
+        res.dataset = {}
+        res.dataset["images"] = [img for img in list(self.imgs.values())]
+
+        print("Loading and preparing results...")
+        tic = time.time()
+        if type(resFile) == str or (PYTHON_VERSION == 2 and type(resFile) == unicode):
+            with open(resFile) as f:
+                anns = json.load(f)
+        elif type(resFile) == np.ndarray:
+            anns = self.loadNumpyAnnotations(resFile)
+        else:
+            anns = resFile
+        assert type(anns) == list, "results in not an array of objects"
+        annsImgIds = [ann["image_id"] for ann in anns]
+        assert set(annsImgIds) == (
+            set(annsImgIds) & set(self.getImgIds())
+        ), "Results do not correspond to current coco set"
+        if "caption" in anns[0]:
+            imgIds = set([img["id"] for img in res.dataset["images"]]) & set(
+                [ann["image_id"] for ann in anns]
+            )
+            res.dataset["images"] = [
+                img for img in res.dataset["images"] if img["id"] in imgIds
+            ]
+            for id, ann in enumerate(anns):
+                ann["id"] = id + 1
+        elif "bbox" in anns[0] and not anns[0]["bbox"] == []:
+            res.dataset["categories"] = copy.deepcopy(list(self.cats.values()))
+            for id, ann in enumerate(anns):
+                bb = ann["bbox"]
+                x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
+                if not "segmentation" in ann:
+                    ann["segmentation"] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
+                ann["area"] = bb[2] * bb[3]
+                ann["id"] = id + 1
+                ann["iscrowd"] = 0
+        elif "segmentation" in anns[0]:
+            res.dataset["categories"] = copy.deepcopy(list(self.cats.values()))
+            for id, ann in enumerate(anns):
+                # now only support compressed RLE format as segmentation results
+                ann["area"] = maskUtils.area(ann["segmentation"])
+                if not "bbox" in ann:
+                    ann["bbox"] = maskUtils.toBbox(ann["segmentation"])
+                ann["id"] = id + 1
+                ann["iscrowd"] = 0
+        elif "keypoints" in anns[0]:
+            res.dataset["categories"] = copy.deepcopy(list(self.cats.values()))
+            for id, ann in enumerate(anns):
+                s = ann["keypoints"]
+                x = s[0::3]
+                y = s[1::3]
+                x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y)
+                ann["area"] = (x1 - x0) * (y1 - y0)
+                ann["id"] = id + 1
+                ann["bbox"] = [x0, y0, x1 - x0, y1 - y0]
+        print("DONE (t={:0.2f}s)".format(time.time() - tic))
+
+        res.dataset["annotations"] = anns
+        res.createIndex()
+        return res
+
+
+class DeeplakeCOCO(_COCO):
+    """This class is almost the same as official pycocotools package.
+
+    It implements some snake case function aliases. So that the COCO class has
+    the same interface as LVIS class.
+    """
+
+    def __init__(
+        self,
+        deeplake_dataset=None,
+        imgs=None,
+        masks=None,
+        masks_type_kind=None,
+        bboxes=None,
+        labels=None,
+        iscrowds=None,
+        class_names=None,
+        bbox_format=("LTRB", "pixel"),
+    ):
+        if getattr(pycocotools, "__version__", "0") >= "12.0.2":
+            warnings.warn(
+                'mmpycocotools is deprecated. Please install official pycocotools by "pip install pycocotools"',  # noqa: E501
+                UserWarning,
+            )
+        super().__init__(
+            deeplake_dataset=deeplake_dataset,
+            imgs=imgs,
+            masks=masks,
+            masks_type_kind=masks_type_kind,
+            labels=labels,
+            bboxes=bboxes,
+            iscrowds=iscrowds,
+            class_names=class_names,
+            bbox_format=bbox_format,
+        )
+        self.img_ann_map = self.imgToAnns
+        self.cat_img_map = self.catToImgs
+
+    def get_ann_ids(self, img_ids=[], cat_ids=[], area_rng=[], iscrowd=None):
+        return self.getAnnIds(img_ids, cat_ids, area_rng, iscrowd)
+
+    def get_cat_ids(self, cat_names=[], sup_names=[], cat_ids=[]):
+        return self.getCatIds(cat_names, sup_names, cat_ids)
+
+    def get_img_ids(self, img_ids=[], cat_ids=[]):
+        return self.getImgIds(img_ids, cat_ids)
+
+    def load_anns(self, ids):
+        return self.loadAnns(ids)
+
+    def load_cats(self, ids):
+        return self.loadCats(ids)
+
+    def load_imgs(self, ids):
+        return self.loadImgs(ids)
+
+
+class COCODatasetEvaluater(mmdet_coco.CocoDataset):
+    def __init__(
+        self,
+        pipeline,
+        deeplake_dataset=None,
+        classes=None,
+        img_prefix="",
+        seg_prefix=None,
+        seg_suffix=".png",
+        proposal_file=None,
+        test_mode=True,
+        filter_empty_gt=True,
+        file_client_args=dict(backend="disk"),
+        imgs=None,
+        masks=None,
+        masks_type_kind=None,
+        bboxes=None,
+        labels=None,
+        iscrowds=None,
+        bbox_format=None,
+        batch_size=1,
+        num_gpus=1,
+    ):
+        self.img_prefix = img_prefix
+        self.seg_prefix = seg_prefix
+        self.seg_suffix = seg_suffix
+        self.proposal_file = proposal_file
+        self.test_mode = test_mode
+        self.filter_empty_gt = filter_empty_gt
+        self.file_client = mmcv.FileClient(**file_client_args)
+        self.CLASSES = classes
+        self.batch_size = batch_size
+        self.num_gpus = num_gpus
+        self.masks_type_kind = masks_type_kind
+
+        self.data_infos = self.load_annotations(
+            deeplake_dataset,
+            imgs=imgs,
+            labels=labels,
+            masks=masks,
+            masks_type_kind=self.masks_type_kind,
+            bboxes=bboxes,
+            iscrowds=iscrowds,
+            class_names=self.CLASSES,
+            bbox_format=bbox_format,
+        )
+        self.proposals = None
+
+        # filter images too small and containing no annotations
+        if not test_mode:
+            valid_inds = self._filter_imgs()
+            self.data_infos = [self.data_infos[i] for i in valid_inds]
+            if self.proposals is not None:
+                self.proposals = [self.proposals[i] for i in valid_inds]
+            # set group flag for the sampler
+            self._set_group_flag()
+
+        # processing pipeline
+
+    def pipeline(self, x):
+        return x
+
+    def __len__(self):
+        length = super().__len__()
+        per_gpu_length = math.floor(length / (self.batch_size * self.num_gpus))
+        total_length = per_gpu_length * self.num_gpus
+        return total_length
+
+    def load_annotations(
+        self,
+        deeplake_dataset,
+        imgs=None,
+        labels=None,
+        masks=None,
+        masks_type_kind=None,
+        bboxes=None,
+        iscrowds=None,
+        class_names=None,
+        bbox_format=None,
+    ):
+        """Load annotation from COCO style annotation file.
+
+        Args:
+            deeplake_dataset (dp.Dataset): Deeplake dataset object.
+            imgs (dp.Tensor): image deeplake tensor.
+            labels (List[numpy]): List of labels for every every detection for each image in numpy format.
+            masks (List[numpy]): List of masks for every every detection for each image in numpy format.
+            bboxes (List[numpy]): List of bboxes for every every detection for each image in numpy.
+            iscrowds (List[numpy]): List of iscrowds for every every detection for each image in numpy format.
+            class_names (List[str]): List of class names for every every detection for each image.
+            bbox_format (Dict[Dict[str, str]]): Dictionary contatining bbox format information.
+
+        Returns:
+            list[dict]: Annotation info from COCO api.
+        """
+
+        self.coco = DeeplakeCOCO(
+            deeplake_dataset,
+            imgs=imgs,
+            labels=labels,
+            bboxes=bboxes,
+            masks=masks,
+            masks_type_kind=masks_type_kind,
+            iscrowds=iscrowds,
+            class_names=class_names,
+            bbox_format=bbox_format,
+        )
+        # The order of returned `cat_ids` will not
+        # change with the order of the CLASSES
+        self.cat_ids = self.coco.get_cat_ids(cat_names=self.CLASSES)
+
+        self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
+        self.img_ids = self.coco.get_img_ids()
+        data_infos = []
+        total_ann_ids = []
+        for i in self.img_ids:
+            info = self.coco.load_imgs([i])[0]
+            data_infos.append(info)
+            ann_ids = self.coco.get_ann_ids(img_ids=[i])
+            total_ann_ids.extend(ann_ids)
+        assert len(set(total_ann_ids)) == len(total_ann_ids)
+        return data_infos
+
+
+def convert_poly_to_coco_format(masks):
+    if isinstance(masks, np.ndarray):
+        px = masks[..., 0]
+        py = masks[..., 1]
+        poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
+        poly = [[float(p) for x in poly for p in x]]
+        return poly
+    poly = []
+    for mask in masks:
+        poly_i = convert_poly_to_coco_format(mask)
+        poly.append([np.array(poly_i[0])])
+    return poly
diff --git a/python/deeplake/integrations/mmdet/test_.py b/python/deeplake/integrations/mmdet/test_.py
new file mode 100644
index 0000000000..c574bbc253
--- /dev/null
+++ b/python/deeplake/integrations/mmdet/test_.py
@@ -0,0 +1,225 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import pickle
+import shutil
+import tempfile
+import time
+
+import mmcv
+import torch
+import torch.distributed as dist
+from mmcv.image import tensor2imgs
+from mmcv.runner import get_dist_info
+
+from mmdet.core import encode_mask_results
+from mmdet.utils import get_device
+
+
+def single_gpu_test(
+    model,
+    data_loader,
+    show=False,
+    out_dir=None,
+    show_score_thr=0.3,
+    show_box_only=False,
+    show_mask_only=False,
+):
+    model.eval()
+    results = []
+    dataset = data_loader.dataset.mmdet_dataset
+    PALETTE = getattr(dataset, "PALETTE", None)
+    prog_bar = mmcv.ProgressBar(len(dataset))
+    for i, data in enumerate(data_loader):
+        with torch.no_grad():
+            result = model(return_loss=False, rescale=True, **data)
+
+        batch_size = len(result)
+        if show or out_dir:
+            if batch_size == 1 and isinstance(data["img"][0], torch.Tensor):
+                img_tensor = data["img"][0]
+            else:
+                img_tensor = data["img"][0].data[0]
+            img_metas = data["img_metas"][0].data[0]
+            imgs = tensor2imgs(img_tensor, **img_metas[0]["img_norm_cfg"])
+            assert len(imgs) == len(img_metas)
+
+            for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
+                h, w, _ = img_meta["img_shape"]
+                img_show = img[:h, :w, :]
+
+                ori_h, ori_w = img_meta["ori_shape"][:-1]
+                img_show = mmcv.imresize(img_show, (ori_w, ori_h))
+
+                if out_dir:
+                    out_file = osp.join(out_dir, img_meta["ori_filename"])
+                else:
+                    out_file = None
+
+                model.module.show_result(
+                    img_show,
+                    result[i],
+                    bbox_color=PALETTE,
+                    text_color=PALETTE,
+                    mask_color=PALETTE,
+                    show=show,
+                    out_file=out_file,
+                    score_thr=show_score_thr,
+                    show_box_only=show_box_only,
+                    show_mask_only=show_mask_only,
+                )
+
+        # encode mask results
+        if isinstance(result[0], tuple):
+            result = [
+                (bbox_results, encode_mask_results(mask_results))
+                for bbox_results, mask_results in result
+            ]
+        # This logic is only used in panoptic segmentation test.
+        elif isinstance(result[0], dict) and "ins_results" in result[0]:
+            for j in range(len(result)):
+                bbox_results, mask_results = result[j]["ins_results"]
+                result[j]["ins_results"] = (
+                    bbox_results,
+                    encode_mask_results(mask_results),
+                )
+
+        results.extend(result)
+
+        for _ in range(batch_size):
+            prog_bar.update()
+    return results
+
+
+def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+    """Test model with multiple gpus.
+
+    This method tests model with multiple gpus and collects the results
+    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+    it encodes results to gpu tensors and use gpu communication for results
+    collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+    and collects them by the rank 0 worker.
+
+    Args:
+        model (nn.Module): Model to be tested.
+        data_loader (nn.Dataloader): Pytorch data loader.
+        tmpdir (str): Path of directory to save the temporary results from
+            different gpus under cpu mode.
+        gpu_collect (bool): Option to use either gpu or cpu to collect results.
+
+    Returns:
+        list: The prediction results.
+    """
+    model.eval()
+    results = []
+    dataset = data_loader.dataset.mmdet_dataset
+    rank, world_size = get_dist_info()
+    if rank == 0:
+        prog_bar = mmcv.ProgressBar(len(dataset))
+    time.sleep(2)  # This line can prevent deadlock problem in some cases.
+    for i, data in enumerate(data_loader):
+        with torch.no_grad():
+            result = model(return_loss=False, rescale=True, **data)
+            # encode mask results
+            if isinstance(result[0], tuple):
+                result = [
+                    (bbox_results, encode_mask_results(mask_results))
+                    for bbox_results, mask_results in result
+                ]
+            # This logic is only used in panoptic segmentation test.
+            elif isinstance(result[0], dict) and "ins_results" in result[0]:
+                for j in range(len(result)):
+                    bbox_results, mask_results = result[j]["ins_results"]
+                    result[j]["ins_results"] = (
+                        bbox_results,
+                        encode_mask_results(mask_results),
+                    )
+
+        results.extend(result)
+
+        if rank == 0:
+            batch_size = len(result)
+            for _ in range(batch_size * world_size):
+                prog_bar.update()
+
+    # collect results from all ranks
+    if gpu_collect:
+        results = collect_results_gpu(results, len(dataset))
+    else:
+        results = collect_results_cpu(results, len(dataset), tmpdir)
+    return results
+
+
+def collect_results_cpu(result_part, size, tmpdir=None):
+    rank, world_size = get_dist_info()
+    default_device = get_device()
+    # create a tmp dir if it is not specified
+    if tmpdir is None:
+        MAX_LEN = 512
+        # 32 is whitespace
+        dir_tensor = torch.full(
+            (MAX_LEN,), 32, dtype=torch.uint8, device=default_device
+        )
+        if rank == 0:
+            mmcv.mkdir_or_exist(".dist_test")
+            tmpdir = tempfile.mkdtemp(dir=".dist_test")
+            tmpdir = torch.tensor(
+                bytearray(tmpdir.encode()), dtype=torch.uint8, device=default_device
+            )
+            dir_tensor[: len(tmpdir)] = tmpdir
+        dist.broadcast(dir_tensor, 0)
+        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+    else:
+        mmcv.mkdir_or_exist(tmpdir)
+    # dump the part result to the dir
+    mmcv.dump(result_part, osp.join(tmpdir, f"part_{rank}.pkl"))
+    dist.barrier()
+    # collect all parts
+    if rank != 0:
+        return None
+    else:
+        # load results of all parts from tmp dir
+        part_list = []
+        for i in range(world_size):
+            part_file = osp.join(tmpdir, f"part_{i}.pkl")
+            part_list.append(mmcv.load(part_file))
+        # sort the results
+        ordered_results = []
+        for res in zip(*part_list):
+            ordered_results.extend(list(res))
+        # the dataloader may pad some samples
+        ordered_results = ordered_results[:size]
+        # remove tmp dir
+        shutil.rmtree(tmpdir)
+        return ordered_results
+
+
+def collect_results_gpu(result_part, size):
+    rank, world_size = get_dist_info()
+    default_device = get_device()
+    # dump result part to tensor with pickle
+    part_tensor = torch.tensor(
+        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device=default_device
+    )
+    # gather all result part tensor shape
+    shape_tensor = torch.tensor(part_tensor.shape, device=default_device)
+    shape_list = [shape_tensor.clone() for _ in range(world_size)]
+    dist.all_gather(shape_list, shape_tensor)
+    # padding result part tensor to max length
+    shape_max = torch.tensor(shape_list).max()
+    part_send = torch.zeros(shape_max, dtype=torch.uint8, device=default_device)
+    part_send[: shape_tensor[0]] = part_tensor
+    part_recv_list = [part_tensor.new_zeros(shape_max) for _ in range(world_size)]
+    # gather all result part
+    dist.all_gather(part_recv_list, part_send)
+
+    if rank == 0:
+        part_list = []
+        for recv, shape in zip(part_recv_list, shape_list):
+            part_list.append(pickle.loads(recv[: shape[0]].cpu().numpy().tobytes()))
+        # sort the results
+        ordered_results = []
+        for res in zip(*part_list):
+            ordered_results.extend(list(res))
+        # the dataloader may pad some samples
+        ordered_results = ordered_results[:size]
+        return ordered_results
diff --git a/python/deeplake/integrations/mmseg/__init__.py b/python/deeplake/integrations/mmseg/__init__.py
new file mode 100644
index 0000000000..29d7586f84
--- /dev/null
+++ b/python/deeplake/integrations/mmseg/__init__.py
@@ -0,0 +1,2 @@
+from deeplake.integrations.mmseg.mmseg_ import train_segmentor
+from mmseg.models import build_segmentor  # type: ignore
diff --git a/python/deeplake/integrations/mmseg/compose_transform_.py b/python/deeplake/integrations/mmseg/compose_transform_.py
new file mode 100644
index 0000000000..0dabdc657b
--- /dev/null
+++ b/python/deeplake/integrations/mmseg/compose_transform_.py
@@ -0,0 +1,78 @@
+import io
+import numpy as np
+from typing import Callable, Optional, List
+from functools import partial
+
+from deeplake.integrations.mm.exceptions import InvalidImageError, InvalidSegmentError
+from deeplake.integrations.mm.upcast_array import upcast_array
+from mmcv.utils import build_from_cfg
+from mmseg.datasets.builder import PIPELINES  # type: ignore
+from mmseg.datasets.pipelines import Compose  # type: ignore
+
+
+def build_pipeline(steps):
+    return Compose(
+        [
+            build_from_cfg(step, PIPELINES, None)
+            for step in steps
+            if step["type"] not in {"LoadImageFromFile", "LoadAnnotations"}
+        ]
+    )
+
+
+def transform(
+    sample_in,
+    images_tensor: str,
+    masks_tensor: str,
+    pipeline: Callable,
+):
+    try:
+        img = upcast_array(sample_in[images_tensor])
+    except Exception as e:
+        raise InvalidImageError(images_tensor, e)
+    if isinstance(img, (bytes, bytearray)):
+        img = np.array(Image.open(io.BytesIO(img)))
+    elif not isinstance(img, np.ndarray):
+        img = np.array(img)
+
+    try:
+        mask = sample_in[masks_tensor]
+    except Exception as e:
+        raise InvalidSegmentMaskError(images_tensor, e)
+    if not isinstance(mask, np.ndarray):
+        mask = np.array(mask)
+
+    if img.ndim == 2:
+        img = np.expand_dims(img, -1)
+
+    img = img[..., ::-1]  # rgb_to_bgr should be optional
+    if img.shape[2] == 1:
+        img = np.repeat(img, 3, axis=2)
+    shape = img.shape
+
+    pipeline_dict = {
+        "img": np.ascontiguousarray(img, dtype=np.float32),
+        "img_fields": ["img"],
+        "filename": None,
+        "ori_filename": None,
+        "img_shape": shape,
+        "ori_shape": shape,
+        "gt_semantic_seg": np.ascontiguousarray(mask, np.int64),
+        "seg_fields": ["gt_semantic_seg"],
+    }
+
+    return pipeline(pipeline_dict)
+
+
+def compose_transform(
+    images_tensor: str,
+    masks_tensor: Optional[str],
+    pipeline: List,
+):
+    pipeline = build_pipeline(pipeline)
+    return partial(
+        transform,
+        images_tensor=images_tensor,
+        masks_tensor=masks_tensor,
+        pipeline=pipeline,
+    )
diff --git a/python/deeplake/integrations/mmseg/mmseg_.py b/python/deeplake/integrations/mmseg/mmseg_.py
new file mode 100644
index 0000000000..be709fdbe7
--- /dev/null
+++ b/python/deeplake/integrations/mmseg/mmseg_.py
@@ -0,0 +1,739 @@
+"""
+Deep Lake offers an integration with MMSegmentation, a popular open-source semantic segmentation toolbox based on PyTorch.
+The integration enables users to train models while streaming Deep Lake dataset using the transformation, training, and evaluation tools built by MMSeg.
+
+Learn more about MMSegmentation `here <https://mmsegmentation.readthedocs.io/en/latest/>`_.
+
+Integration Interface
+~~~~~~~~~~~~~~~~~~~~~
+MMSegmentation works with configs. Deeplake adopted this strategy, and in order to train MMSeg models, you need to create/specify your model
+and training/validation config. Deep Lake integration's logic is almost the same as MMSegmentation's with some minor modifications. The integrations
+with MMSeg occurs in the deeplake.integrations.mmseg module. At a high-level, Deep Lake is responsible for the pytorch dataloader that streams data
+to the training framework, while MMSeg is used for the training, transformation, and evaluation logic. Let us take a look at the config with deeplake changes:
+
+Deeplake integration requires the following parameters to be specified in the configuration file:
+
+- ``data``: Just like in the MMSegmentation configuration files, in data dictionary you can specify everything that you want to be applied to the data during training and validation
+    - ``train``: Keyword argument of data, a dictionary where one can specify dataset path, credentials, transformations of the training data
+    - ``val``: Keyword argument of data, a dictionary where one can specify dataset path, credentials, transformations of the validation data
+    - ``pipeline``: List of transformations. This parameter exists for train as well as for val.
+
+        - Example:
+
+            >>> pipeline =  [dict(type="Resize", img_scale=[(320, 320), (608, 608)], keep_ratio=True), dict(type="RandomFlip", flip_ratio=0.5), dict(type="PhotoMetricDistortion")]
+
+    - ``deeplake_path``: Path to the deeplake dataset. This parameter exists for train as well as for val.
+    - ``deeplake_credentials``: Optional parameter. Required only when using private nonlocal datasets. See documendataion for `deeplake.open_read_only() https://docs.deeplake.ai/latest/api/dataset/#deeplake.open_read_only`_ for details. This parameter exists for train as well as for val.
+    - ``deeplake_tag_id``: Optional parameter. If specified, the dataset will checkout to the commit. This parameter exists for train as well as for val. See documentation for `Dataset.commit_id <https://deep-lake--2152.org.readthedocs.build/en/2152/deeplake.core.dataset.html#deeplake.core.dataset.Dataset.commit_id>`_
+    - ``deeplake_query``: Optional paramterer. If specified, the dataset can be loaded from the query is dataset_path was not been specified as well as the applied on that dataset of dataset_path was specified before
+    - ``deeplake_tensors``: Optional parameter. If specified maps MMSegmentation tensors to the associated tensors in the dataset. MMSeg tensors are: "img", "gt_semantic_seg". This parameter exists for train as well as for val.
+        - ``"img"``: Stands for image tensor.
+        - ``"gt_semantic_seg"``: Stands for semantic segmenataion tensor.
+
+    - ``deeplake_dataloader``: Optional parameter. If specified represents the parameters of the deeplake dataloader. Deeplake dataloader parameters are: "shuffle", "batch_size", "num_workers". This parameter exists for train as well as for val.
+        - ``"shuffle"``: If ``True`` shuffles the dataset.
+        - ``"batch_size"``: Size of batch. If not specified, dataloader will use ``samples_per_gpu``.
+        - ``"num_workers"``: Number of workers to use. If not specified, dataloader will use ``workers_per_gpu``.
+
+Example:
+
+>>> evaluation = dict(metric=["mIoU"], interval=1)
+
+- ``train_segmentor``: Function to train the MMSegmentation model.
+
+    Parameters:
+
+        - ``model``: MMSegmentation model that is going to be used.
+        - ``cfg``: mmcv.ConfigDict, Configuration of the model as well as of the datasets and transforms that's going to be used.
+        - ``ds_train``: Optional parameter. If provided will overwrite deeplake_path in train, and will pass this tensor directly to the dataloader.
+        - ``ds_val``: Optional parameter. If provided will overwrite deeplake_path in val, and will pass this tensor directly to the dataloader.
+        - ``ds_train_tensors``: Optional parameter. If provided will overwrite deeplake_tensors in train, and will pass this tensor mapping directly to dataloader.
+        - ``ds_val_tensors``: Optional parameter. If provided will overwrite deeplake_tensors in val, and will pass this tensor mapping directly to dataloader.
+        - ``distributed``: Optional parameter. If provided will run the code on all available gpus. Meta data used to build runner.
+        - ``timestamp``: Variable used in runner to make .log and .log.json filenames the same.
+        - ``validate``: Bool, whether validation should be run, defaults to ``True``.
+
+
+MMSegmentation Config Examples
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Below is the example of the deeplake mmseg configuration:
+
+
+>>> _base_ = "../mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k.py"
+>>> # use caffe img_norm
+>>> img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True)
+>>> train_pipeline = [
+...     dict(type='LoadImageFromFile'),
+...     dict(type='LoadAnnotations'),
+...     dict(
+...         type='Expand',
+...         mean=img_norm_cfg['mean'],
+...         to_rgb=img_norm_cfg['to_rgb'],
+...         ratio_range=(1, 2)),
+...     dict(type='Resize', img_scale=[(320, 320), (416, 416)], keep_ratio=True),
+...     dict(type='RandomFlip', flip_ratio=0.0),
+...     dict(type='PhotoMetricDistortion'),
+...     dict(type='Normalize', **img_norm_cfg),
+...     dict(type='Pad', size_divisor=32),
+...     dict(type='DefaultFormatBundle'),
+...     dict(type='Collect', keys=['img', 'gt_semantic_seg'])
+... ]
+>>> test_pipeline = [
+...     dict(type='LoadImageFromFile'),
+...     dict(
+...         type='MultiScaleFlipAug',
+...         img_scale=(416, 416),
+...         flip=False,
+...         transforms=[
+...             dict(type='Resize', keep_ratio=True),
+...             dict(type='RandomFlip', flip_ratio=0.0),
+...             dict(type='Normalize', **img_norm_cfg),
+...             dict(type='Pad', size_divisor=32),
+...             dict(type='ImageToTensor', keys=['img']),
+...             dict(type='Collect', keys=['img'])
+...         ])
+... ]
+>>> #--------------------------------------DEEPLAKE INPUTS------------------------------------------------------------#
+>>> TOKEN = "INSERT_YOUR_DEEPLAKE_TOKEN"
+>>> data = dict(
+...     # samples_per_gpu=4, # Is used instead of batch_size if deeplake_dataloader is not specified below
+...     # workers_per_gpu=8, # Is used instead of num_workers if deeplake_dataloader is not specified below
+...     train=dict(
+...         pipeline=train_pipeline,
+...         # Credentials for authentication. See documendataion for deeplake.load() for details
+...         deeplake_path="hub://activeloop/semantic-seg-train",
+...          deeplake_credentials={
+...             "token": TOKEN,
+...             "creds": None,
+...         },
+...         #OPTIONAL - Checkout the specified commit_id before training
+...         deeplake_commit_id="",
+...         #OPTIONAL - Loads a dataset tag for training based on tag_id
+...         deeplake_tag_id="",
+...         # OPTIONAL - {"mmseg_key": "deep_lake_tensor",...} - Maps Deep Lake tensors to MMSeg dictionary keys.
+...         # If not specified, Deep Lake will auto-infer the mapping, but it might make mistakes if datasets have many tensors
+...         deeplake_tensors = {"img": "images", "gt_semantic_seg": "semantic_seg"},
+...         # OPTIONAL - Parameters to use for the Deep Lake dataloader. If unspecified, the integration uses
+...         # the parameters in other parts of the cfg file such as samples_per_gpu, and others.
+...         deeplake_dataloader = {"shuffle": True, "batch_size": 4, 'num_workers': 8}
+...     ),
+...     # Parameters as the same as for train
+...     val=dict(
+...         pipeline=test_pipeline,
+...         deeplake_path="hub://activeloop/semantic-seg-val",
+...         deeplake_credentials={
+...             "token": TOKEN,
+...             "creds": None,
+...         },
+...         deeplake_tensors = {"img": "images", "gt_semantic_seg": "semantic_seg"},
+...         deeplake_dataloader = {"shuffle": False, "batch_size": 1, 'num_workers': 8}
+...     ),
+... )
+>>> # Which dataloader to use
+>>> # Which metrics to use for evaulation. In MMSeg (without Deeplake), this is inferred from the dataset type.
+>>> # In the Deep Lake integration, since the format is standardized, a variety of metrics can be used for a given dataset.
+>>> #----------------------------------END DEEPLAKE INPUTS------------------------------------------------------------#
+
+And config for training:
+
+>>> import os
+>>> from mmcv import Config
+>>> import mmcv
+>>> from deeplake.integrations import mmseg as mmseg_deeplake
+>>> cfg = Config.fromfile(cfg_file)
+>>> # Build the segmentor
+>>> model = mmseg_deeplake.build_segmentor(cfg.model)
+>>> # Create work_dir
+>>> mmcv.mkdir_or_exist(os.path.abspath(cfg.work_dir))
+>>> # Run the training
+>>> mmseg_deeplake.train_segmentor(model, cfg, distributed=args.distributed, validate=args.validate)
+"""
+
+import warnings
+import torch
+import numpy as np
+import io
+import math
+import types
+from functools import partial
+
+from typing import Callable, Optional, List, Dict, Sequence, Union
+from PIL import Image  # type: ignore
+
+from mmseg.core import DistEvalHook, EvalHook  # type: ignore
+from mmseg.core import build_optimizer
+from mmseg.utils import (  # type: ignore
+    build_dp,
+    find_latest_checkpoint,
+    get_root_logger,
+)
+
+from mmseg.datasets.samplers import DistributedSampler  # type: ignore
+from mmseg.utils.util_distribution import *  # type: ignore
+from deeplake.integrations.mm.get_indexes import get_indexes
+from deeplake.integrations.mm.worker_init_fn import worker_init_fn
+from deeplake.integrations.mm.ipc import _get_free_port
+from deeplake.integrations.mm.exceptions import ValidationDatasetMissingError
+
+from mmcv.utils import build_from_cfg, digit_version  # type: ignore
+from mmcv.parallel import collate  # type: ignore
+import mmcv  # type: ignore
+from mmcv.runner import init_dist  # type: ignore
+from mmcv.runner import (  # type: ignore
+    DistSamplerSeedHook,
+    EpochBasedRunner,
+    OptimizerHook,
+    build_runner,
+    get_dist_info,
+    HOOKS,
+)
+
+
+import deeplake as dp
+from deeplake.types import TypeKind
+from deeplake.integrations.mm.warnings import always_warn
+
+from deeplake.integrations.mm.mm_runners import DeeplakeIterBasedRunner
+from deeplake.integrations.mm.mm_common import (
+    load_ds_from_cfg,
+    get_collect_keys,
+    check_persistent_workers,
+    find_image_tensor,
+    find_smask_tensor,
+    ddp_setup,
+    force_cudnn_initialization,
+    check_unsupported_functionalities,
+    get_pipeline,
+)
+from deeplake.integrations.mmseg.mmseg_dataset_ import MMSegDataset, MMSegTorchDataset
+from deeplake.integrations.mmseg.compose_transform_ import compose_transform
+
+from torch.utils.data import DataLoader, IterableDataset
+
+
+# Monkey-patch the function
+from deeplake.integrations.mmseg.test_ import single_gpu_test as custom_single_gpu_test
+from deeplake.integrations.mmseg.test_ import multi_gpu_test as custom_multi_gpu_test
+
+import mmseg.apis
+
+mmseg.apis.single_gpu_test = custom_single_gpu_test
+mmseg.apis.multi_gpu_test = custom_multi_gpu_test
+
+
+def build_ddp(model, device, *args, **kwargs):
+    """Build DistributedDataParallel module by device type.
+
+    If device is cuda, return a MMDistributedDataParallel model;
+    if device is mlu, return a MLUDistributedDataParallel model.
+
+    Args:
+        model (:class:`nn.Module`): module to be parallelized.
+        device (str): device type, mlu or cuda.
+        args (List): arguments to be passed to ddp_factory
+        kwargs (dict): keyword arguments to be passed to ddp_factory
+
+    Returns:
+        :class:`nn.Module`: the module to be parallelized
+
+    References:
+        .. [1] https://pytorch.org/docs/stable/generated/torch.nn.parallel.
+                     DistributedDataParallel.html
+    """
+
+    assert device in ["cuda", "mlu"], "Only available for cuda or mlu devices."
+    if device == "cuda":
+        model = model.cuda(kwargs["device_ids"][0])  # patch
+    elif device == "mlu":
+        from mmcv.device.mlu import MLUDistributedDataParallel  # type: ignore
+
+        ddp_factory["mlu"] = MLUDistributedDataParallel
+        model = model.mlu()
+
+    return ddp_factory[device](model, *args, **kwargs)
+
+
+def mmseg_subiterable_dataset_eval(
+    self,
+    *args,
+    **kwargs,
+):
+    return self.dataset.mmseg_dataset.evaluate(*args, **kwargs)
+
+
+def train_segmentor(
+    model,
+    cfg: mmcv.ConfigDict,
+    ds_train=None,
+    ds_train_tensors=None,
+    ds_val: Optional[dp.Dataset] = None,
+    ds_val_tensors=None,
+    distributed: bool = False,
+    timestamp=None,
+    meta=None,
+    validate: bool = True,
+):
+    """
+    Creates runner and trains evaluates the model:
+    Args:
+        model: model to train, should be built before passing
+        cfg: mmcv.ConfigDict object containing all necessary configuration.
+            In cfg we have several changes to support deeplake integration:
+                _base_: still serves as a base model to inherit from
+                data: where everything related to data processing, you will need to specify the following parameters:
+                    train: everything related to training data, it has the following attributes:
+                        pipeline: dictionary where all training augmentations and transformations should be specified, like in mmdet
+                        deeplake_tensors: dictionary that maps mmseg keys to deeplake dataset tensor. Example:  `{"img": "images", "gt_semantic_seg": "semantic_seg"}`.
+                            If this dictionary is not specified, these tensors will be searched automatically using htypes like "image" and "segment_mask".
+                            keys that needs to be mapped are: `img` and "gt_semantic_seg". `img` and `gt_semantic_seg` are always required, if they not specified they
+                            are always searched, if you specify in collect `gt_semantic_seg` then you need to either specify it in config or it will be searched based on
+                            `segment_mask` htype.
+                        deeplake_credentials: dictionary with deeplake credentials that allow you to access the specified data. It has following arguments: `token`.
+                            `token` is the token that gives you read or write access to the datasets. It is available in your personal account on: https://www.activeloop.ai/.
+                    val (Optional): everything related to validating data, it has the following attributes:
+                        pipeline: dictionary where all training augmentations and transformations should be specified, like in mmdet
+                        deeplake_tensors: dictionary that maps mmseg keys to deeplake dataset tensor. Example:  `{"img": "images", "gt_semantic_seg": "semantic_seg"}`.
+                            If this dictionary is not specified, these tensors will be searched automatically using htypes like "image" and "segment_mask".
+                            keys that needs to be mapped are: `img` and "gt_semantic_seg". `img` and `gt_semantic_seg` are always required, if they not specified they
+                            are always searched, if you specify in collect `gt_semantic_seg` then you need to either specify it in config or it will be searched based on
+                            `segment_mask` htype.
+                        deeplake_credentials: deeplake credentials that allow you to access the specified data. It has following arguments: `token`.
+                            `token` is the token that gives you read or write access to the datasets. It is available in your personal account on: https://www.activeloop.ai/.
+                    test (Optional): everything related to testing data, it has the following attributes:
+                        pipeline: dictionary where all training augmentations and transformations should be specified, like in mmdet
+                        deeplake_tensors: dictionary that maps mmseg keys to deeplake dataset tensor. Example:  `{"img": "images", "gt_semantic_seg": "semantic_seg"}`.
+                            If this dictionary is not specified, these tensors will be searched automatically using htypes like "image" and "segment_mask".
+                            keys that needs to be mapped are: `img` and "gt_semantic_seg". `img` and `gt_semantic_seg` are always required, if they not specified they
+                            are always searched, if you specify in collect `gt_semantic_seg` then you need to either specify it in config or it will be searched based on
+                            `segment_mask` htype.
+                        deeplake_credentials: deeplake credentials that allow you to access the specified data. It has following arguments: `token`.
+                            `token` is the token that gives you read or write access to the datasets. It is available in your personal acccount on: https://www.activeloop.ai/.
+                    samples_per_gpu: number of samples to be processed per gpu
+                    workers_per_gpu: number of workers per gpu
+                optimizer: dictionary containing information about optimizer initialization
+                optimizer_config: some optimizer configuration that might be used during training like grad_clip etc.
+                runner: training type e.g. EpochBasedRunner, here you can specify maximum number of epochs to be conducted. For instance: `runner = dict(type='EpochBasedRunner', max_epochs=273)`
+        ds_train: train dataset of type dp.Dataset. This can be a view of the dataset.
+        ds_train_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example:  {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}.
+            If this dictionary is not specified, these tensors will be searched automatically using htypes like "image" and "segment_mask".
+            keys that needs to be mapped are: `img` and "gt_semantic_seg". `img` and `gt_semantic_seg` are always required, if they not specified they
+            are always searched, if you specify in collect `gt_semantic_seg` then you need to either specify it in config or it will be searched based on
+            `segment_mask` htype.
+        ds_val: validation dataset of type dp.Dataset. This can be view of the dataset.
+        ds_val_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example:  {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}.
+            If this dictionary is not specified, these tensors will be searched automatically using htypes like "image" and "segment_mask".
+            keys that needs to be mapped are: `img` and "gt_semantic_seg". `img` and `gt_semantic_seg` are always required, if they not specified they
+            are always searched, if you specify in collect `gt_semantic_seg` then you need to either specify it in config or it will be searched based on
+            `segment_mask` htype.
+        evaluation: dictionary that contains all information needed for evaluation apart from data processing, like how often evaluation should be done and what metrics we want to use.
+            For instance, `evaluation = dict(interval=1, metric=['mIoU'])`
+        distributed: bool, whether ddp training should be started, by default `False`
+        timestamp: variable used in runner to make .log and .log.json filenames the same
+        meta: meta data used to build runner
+        validate: bool, whether validation should be conducted, by default `True`
+    """
+    check_unsupported_functionalities(cfg)
+
+    if not hasattr(cfg, "gpu_ids"):
+        cfg.gpu_ids = range(torch.cuda.device_count() if distributed else 1)
+    if distributed:
+        return torch.multiprocessing.spawn(
+            _train_segmentor,
+            args=(
+                model,
+                cfg,
+                ds_train,
+                ds_train_tensors,
+                ds_val,
+                ds_val_tensors,
+                distributed,
+                timestamp,
+                meta,
+                validate,
+                _get_free_port(),
+            ),
+            nprocs=len(cfg.gpu_ids),
+        )
+    _train_segmentor(
+        0,
+        model,
+        cfg,
+        ds_train,
+        ds_train_tensors,
+        ds_val,
+        ds_val_tensors,
+        distributed,
+        timestamp,
+        meta,
+        validate,
+    )
+
+
+def register_validation_hook_(
+    batch_size: int,
+    num_workers: int,
+    distributed: bool,
+    cfg: mmcv.ConfigDict,
+    ignore_index: int,
+    reduce_zero_label: bool,
+    train_persistent_workers: bool = False,
+    ds_val: Optional[dp.Dataset] = None,
+    ds_val_tensors=None,
+    runner=None,
+):
+    eval_cfg = cfg.get("evaluation", {})
+    val_dataloader_default_args = dict(
+        samples_per_gpu=batch_size,
+        workers_per_gpu=num_workers,
+        dist=distributed,
+        shuffle=False,
+        mode="val",
+        seed=cfg.seed,
+        num_gpus=len(cfg.gpu_ids),
+        ignore_index=ignore_index,
+        reduce_zero_label=reduce_zero_label,
+    )
+
+    val_dataloader_args = {
+        **cfg.data.val.get("deeplake_dataloader", {}),
+        **val_dataloader_default_args,
+    }
+
+    val_persistent_workers = val_dataloader_args.get("persistent_workers", False)
+    check_persistent_workers(train_persistent_workers, val_persistent_workers)
+
+    if val_dataloader_args.get("shuffle", False):
+        always_warn("shuffle argument for validation dataset will be ignored.")
+
+    if ds_val is None:
+        cfg_ds_val = cfg.data.get("val")
+        if not cfg_ds_val or not any(
+            cfg_ds_val.get(key) is not None
+            for key in ["deeplake_path", "deeplake_query"]
+        ):
+            raise ValidationDatasetMissingError()
+        ds_val = load_ds_from_cfg(cfg.data.val)
+        ds_val_tensors = cfg.data.val.get("deeplake_tensors", {})
+    else:
+        cfg_data = cfg.data.val.get("deeplake_path")
+        if cfg_data is not None:
+            always_warn(
+                "A Deep Lake dataset was specified in the cfg as well as in the dataset input to train_segmentor. The dataset input to train_segmentor will be used in the workflow."
+            )
+
+    if ds_val is None:
+        raise ValidationDatasetMissingError()
+
+    if ds_val_tensors:
+        val_images_tensor = ds_val_tensors["img"]
+        val_masks_tensor = ds_val_tensors.get("gt_semantic_seg")
+    else:
+        val_images_tensor = find_image_tensor(ds_val, mm_class="img")
+        val_masks_tensor = None
+        collection_keys = get_collect_keys(cfg)
+        if "gt_semantic_seg" in collection_keys:
+            val_masks_tensor = find_smask_tensor(ds_val, mm_class="gt_semantic_seg")
+
+    val_pipeline = get_pipeline(cfg, name="val", generic_name="test_pipeline")
+
+    val_dataloader = build_dataloader(
+        ds_val,
+        val_images_tensor,
+        val_masks_tensor,
+        pipeline=val_pipeline,
+        **val_dataloader_args,
+    )
+
+    eval_cfg["by_epoch"] = cfg.runner["type"] != "DeeplakeIterBasedRunner"
+    eval_cfg["pre_eval"] = False
+    eval_hook = EvalHook
+    if distributed:
+        eval_hook = DistEvalHook
+    # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the
+    # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'.
+    runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority="LOW")
+
+
+def _train_segmentor(
+    local_rank,
+    model,
+    cfg: mmcv.ConfigDict,
+    ds_train=None,
+    ds_train_tensors=None,
+    ds_val: Optional[dp.Dataset] = None,
+    ds_val_tensors=None,
+    distributed: bool = False,
+    timestamp=None,
+    meta=None,
+    validate: bool = True,
+    port=None,
+):
+    batch_size = cfg.data.get("samples_per_gpu", 256)
+    num_workers = cfg.data.get("workers_per_gpu", 1)
+
+    ignore_index = cfg.get("ignore_index", 255)
+    reduce_zero_label = cfg.get("reduce_zero_label", False)
+
+    if ds_train is None:
+        ds_train = load_ds_from_cfg(cfg.data.train)
+        ds_train_tensors = cfg.data.train.get("deeplake_tensors", {})
+    else:
+        cfg_data = cfg.data.train.get("deeplake_path")
+        if cfg_data:
+            always_warn(
+                "A Deep Lake dataset was specified in the cfg as well as in the dataset input to train_segmentor. The dataset input to train_segmentor will be used in the workflow."
+            )
+
+    if ds_train_tensors:
+        train_images_tensor = ds_train_tensors["img"]
+        train_masks_tensor = ds_train_tensors.get("gt_semantic_seg")
+    else:
+        train_images_tensor = find_image_tensor(ds_train, mm_class="img")
+        train_masks_tensor = None
+
+        collection_keys = get_collect_keys(cfg)
+        if "gt_semantic_seg" in collection_keys:
+            train_masks_tensor = find_smask_tensor(ds_train, mm_class="gt_semantic_seg")
+
+    model.CLASSES = ds_train[train_masks_tensor].metadata["class_names"]
+
+    logger = get_root_logger(log_level=cfg.log_level)
+    runner_type = "EpochBasedRunner" if "runner" not in cfg else cfg.runner["type"]
+
+    train_dataloader_default_args = dict(
+        samples_per_gpu=batch_size,
+        workers_per_gpu=num_workers,
+        # `num_gpus` will be ignored if distributed
+        num_gpus=len(cfg.gpu_ids),
+        dist=distributed,
+        seed=cfg.seed,
+        runner_type=runner_type,
+        ignore_index=ignore_index,
+        reduce_zero_label=reduce_zero_label,
+    )
+
+    train_loader_cfg = {
+        **train_dataloader_default_args,
+        **cfg.data.get("train_dataloader", {}),
+        **cfg.data.train.get("deeplake_dataloader", {}),
+    }
+
+    # # put model on gpus
+    if distributed:
+        find_unused_parameters = cfg.get("find_unused_parameters", False)
+        # Sets the `find_unused_parameters` parameter in
+        # # torch.nn.parallel.DistributedDataParallel
+        # model = torch.nn.parallel.DistributedDataParallel(model.cuda(),
+        #                                           device_ids=[local_rank],
+        #                                           output_device=local_rank,
+        #                                           broadcast_buffers=False,
+        #                                           find_unused_parameters=find_unused_parameters)
+        force_cudnn_initialization(cfg.gpu_ids[local_rank])
+        ddp_setup(local_rank, len(cfg.gpu_ids), port)
+        model = build_ddp(
+            model,
+            cfg.device,
+            device_ids=[cfg.gpu_ids[local_rank]],
+            broadcast_buffers=False,
+            find_unused_parameters=find_unused_parameters,
+        )
+    else:
+        model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids)
+
+    train_pipeline = get_pipeline(cfg, name="train", generic_name="train_pipeline")
+
+    data_loader = build_dataloader(
+        ds_train,
+        train_images_tensor,
+        train_masks_tensor,
+        pipeline=train_pipeline,
+        **train_loader_cfg,
+    )
+
+    # build optimizer
+    optimizer = build_optimizer(model, cfg.optimizer)
+
+    # check runner
+    cfg.custom_imports = dict(
+        imports=["deeplake.integrations.mm.mm_runners"],
+        allow_failed_imports=False,
+    )
+    if cfg.runner.type == "IterBasedRunner":
+        cfg.runner.type = "DeeplakeIterBasedRunner"
+    elif cfg.runner.type == "EpochBasedRunner":
+        cfg.runner.type = "DeeplakeEpochBasedRunner"
+
+    runner = build_runner(
+        cfg.runner,
+        default_args=dict(
+            model=model,
+            optimizer=optimizer,
+            work_dir=cfg.work_dir,
+            logger=logger,
+            meta=meta,
+            force_cleanup=False,
+        ),
+    )
+
+    # an ugly workaround to make .log and .log.json filenames the same
+    runner.timestamp = timestamp
+
+    if distributed and "type" not in cfg.optimizer_config:
+        optimizer_config = OptimizerHook(**cfg.optimizer_config)
+    else:
+        optimizer_config = cfg.optimizer_config
+
+    # register hooks
+    runner.register_training_hooks(
+        cfg.lr_config,
+        optimizer_config,
+        cfg.checkpoint_config,
+        cfg.log_config,
+        cfg.get("momentum_config", None),
+    )
+
+    if distributed and isinstance(runner, EpochBasedRunner):
+        runner.register_hook(DistSamplerSeedHook())
+
+    # register eval hooks
+    if validate:
+        register_validation_hook_(
+            batch_size=batch_size,
+            num_workers=num_workers,
+            distributed=distributed,
+            train_persistent_workers=train_loader_cfg.get("persistent_workers", False),
+            cfg=cfg,
+            ignore_index=ignore_index,
+            reduce_zero_label=reduce_zero_label,
+            ds_val=ds_val,
+            ds_val_tensors=ds_val_tensors,
+            runner=runner,
+        )
+
+    # user-defined hooks
+    if cfg.get("custom_hooks", None):
+        custom_hooks = cfg.custom_hooks
+        assert isinstance(
+            custom_hooks, list
+        ), f"custom_hooks expect list type, but got {type(custom_hooks)}"
+        for hook_cfg in cfg.custom_hooks:
+            assert isinstance(hook_cfg, dict), (
+                "Each item in custom_hooks expects dict type, but got "
+                f"{type(hook_cfg)}"
+            )
+            hook_cfg = hook_cfg.copy()
+            priority = hook_cfg.pop("priority", "NORMAL")
+            hook = build_from_cfg(hook_cfg, HOOKS)
+            runner.register_hook(hook, priority=priority)
+
+    resume_from = None
+    if cfg.resume_from is None and cfg.get("auto_resume"):
+        resume_from = find_latest_checkpoint(cfg.work_dir)
+    if resume_from is not None:
+        cfg.resume_from = resume_from
+
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from)
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run([data_loader], cfg.workflow)
+
+
+def build_dataloader(
+    dataset: dp.Dataset,
+    images_tensor: str,
+    masks_tensor: Optional[str],
+    pipeline: List,
+    mode: str = "train",
+    **loader_config,
+):
+    persistent_workers = loader_config.get("persistent_workers", False)
+    _ = loader_config.get("ignore_index")
+    _ = loader_config.get("reduce_zero_label")
+    dist = loader_config["dist"]
+    seed = loader_config["seed"]
+    transform_fn = compose_transform(
+        images_tensor=images_tensor, masks_tensor=masks_tensor, pipeline=pipeline
+    )
+
+    num_workers = loader_config.get("num_workers")
+    pin_memory = loader_config.get("pin_memory", False)
+    if num_workers is None:
+        num_workers = loader_config["workers_per_gpu"]
+
+    shuffle = loader_config.get("shuffle", True)
+
+    tensors_dict = {
+        "images_tensor": images_tensor,
+    }
+    tensors = [images_tensor]
+    if masks_tensor is not None:
+        tensors.append(masks_tensor)
+        tensors_dict["masks_tensor"] = masks_tensor
+
+    batch_size = loader_config.get("batch_size")
+    drop_last = loader_config.get("drop_last", False)
+    if batch_size is None:
+        batch_size = loader_config["samples_per_gpu"]
+
+    collate_fn = partial(collate, samples_per_gpu=batch_size)
+
+    mmseg_ds = MMSegDataset(
+        dataset=dataset,
+        transform=transform_fn,
+        # pipeline=pipeline,
+        tensors_dict=tensors_dict,
+        tensors=tensors,
+        mode=mode,
+        num_gpus=loader_config["num_gpus"],
+        batch_size=batch_size,
+    )
+
+    if dist:
+        rank, world_size = get_dist_info()
+        sl = get_indexes(
+            dataset, rank=rank, num_replicas=world_size, drop_last=drop_last
+        )
+        dataset = dataset.query(
+            f"select * LIMIT {sl.stop - sl.start} OFFSET {sl.start}"
+        )
+
+    pytorch_ds = MMSegTorchDataset(dataset, transform=transform_fn)
+    pytorch_ds.mmseg_dataset = mmseg_ds
+
+    init_fn = (
+        partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed)
+        if seed is not None
+        else None
+    )
+
+    if digit_version(torch.__version__) >= digit_version("1.8.0"):
+        loader = DataLoader(
+            pytorch_ds,
+            batch_size=batch_size,
+            sampler=None,
+            num_workers=num_workers,
+            collate_fn=collate_fn,
+            pin_memory=pin_memory,
+            shuffle=shuffle,
+            worker_init_fn=init_fn,
+            drop_last=drop_last,
+            persistent_workers=persistent_workers,
+        )
+    else:
+        loader = DataLoader(
+            pytorch_ds,
+            batch_size=batch_size,
+            sampler=None,
+            num_workers=num_workers,
+            collate_fn=collate_fn,
+            pin_memory=pin_memory,
+            shuffle=shuffle,
+            worker_init_fn=init_fn,
+            drop_last=drop_last,
+        )
+
+    eval_fn = partial(mmseg_subiterable_dataset_eval, loader)
+    loader.dataset.evaluate = eval_fn
+
+    return loader
diff --git a/python/deeplake/integrations/mmseg/mmseg_dataset_.py b/python/deeplake/integrations/mmseg/mmseg_dataset_.py
new file mode 100644
index 0000000000..494f730b57
--- /dev/null
+++ b/python/deeplake/integrations/mmseg/mmseg_dataset_.py
@@ -0,0 +1,239 @@
+from collections import OrderedDict
+import math
+import numpy as np
+
+from typing import Optional, Callable, Sequence
+from torch.utils.data import Dataset
+from prettytable import PrettyTable  # type: ignore
+
+import mmcv
+from mmcv.utils import print_log
+from mmseg.core import eval_metrics, intersect_and_union, pre_eval_to_metrics
+
+from deeplake.integrations.mm.exceptions import InvalidImageError, InvalidSegmentError
+from deeplake.integrations.mm.upcast_array import upcast_array
+import time
+
+
+class MMSegTorchDataset(Dataset):
+    def __init__(
+        self,
+        dataset,
+        tensors=None,
+        transform: Optional[Callable] = None,
+    ) -> None:
+        super().__init__()
+        self.dataset = dataset
+        self.transform = transform
+        self.column_names = [col.name for col in self.dataset.schema.columns]
+        self.last_successful_index = -1
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def __getitem__(self, idx):
+        while True:
+            try:
+                sample = self.dataset[idx]
+                result = None
+                if self.transform:
+                    result = self.transform(sample)
+                else:
+                    out = {}
+                    for col in self.column_names:
+                        out[col] = sample[col]
+                    result = out
+                self.last_successful_index = idx
+                return result
+            except (InvalidImageError, InvalidSegmentError) as e:
+                print(f"Error processing data at index {idx}: {e}")
+                if self.last_successful_index == -1:
+                    self.last_successful_index = idx + 1
+                idx = self.last_successful_index
+                continue
+
+
+class MMSegDataset(MMSegTorchDataset):
+    def __init__(
+        self,
+        *args,
+        tensors_dict,
+        mode="train",
+        num_gpus=1,
+        batch_size=1,
+        ignore_index=255,
+        reduce_zero_label=False,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+        self.mode = mode
+        self.num_gpus = num_gpus
+        self.batch_size = batch_size
+        self.ignore_index = ignore_index
+        self.reduce_zero_label = reduce_zero_label
+        self.masks_tensor_name = tensors_dict["masks_tensor"]
+        if self.mode in ("val", "test"):
+            self.CLASSES = self.get_classes(tensors_dict["masks_tensor"])[:]
+
+    def __len__(self):
+        if self.mode == "val":
+            per_gpu_length = math.floor(
+                len(self.dataset) / (self.batch_size * self.num_gpus)
+            )
+            total_length = per_gpu_length * self.num_gpus
+            return total_length
+        return super().__len__()
+
+    def _get_masks(self, masks_tensor):
+        if masks_tensor is None:
+            return []
+        return self.dataset[masks_tensor]
+
+    def get_classes(self, classes):
+        """Get class names of current dataset.
+
+        Args:
+            classes (str): Reresents the name of the classes tensor. Overrides the CLASSES defined by the dataset.
+
+        Returns:
+            list[str]: Names of categories of the dataset.
+        """
+        return self.dataset[classes].metadata["class_names"]
+
+    def get_gt_seg_maps(self, efficient_test=None):
+        """Get ground truth segmentation maps for evaluation."""
+        if efficient_test is not None:
+            warnings.warn(
+                "DeprecationWarning: ``efficient_test`` has been deprecated "
+                "since MMSeg v0.16, the ``get_gt_seg_maps()`` is CPU memory "
+                "friendly by default. "
+            )
+
+        mask_col = self._get_masks(self.masks_tensor_name)
+        last_successful_index = -1
+        for idx in range(len(self)):
+            try:
+                result = upcast_array(mask_col[idx])
+                last_successful_index = idx
+                yield result
+            except Exception as e:
+                print(f"Error processing mask at index {idx}: {e}")
+                if last_successful_index == -1:
+                    continue
+                else:
+                    yield upcast_array(mask_col[last_successful_index])
+
+    def evaluate(self, results, metric="mIoU", logger=None, gt_seg_maps=None, **kwargs):
+        """Evaluate the dataset.
+
+        Args:
+            results (list[tuple[torch.Tensor]] | list[str]): per image pre_eval
+                 results or predict segmentation map for computing evaluation
+                 metric.
+            metric (str | list[str]): Metrics to be evaluated. 'mIoU',
+                'mDice' and 'mFscore' are supported.
+            logger (logging.Logger | None | str): Logger used for printing
+                related information during evaluation. Default: None.
+            gt_seg_maps (generator[ndarray]): Custom gt seg maps as input,
+                used in ConcatDataset
+
+        ..
+            # noqa: DAR101
+
+        Raises:
+            KeyError: if a specified metric format is not supported
+
+        Returns:
+            dict[str, float]: Default metrics.
+        """
+
+        if self.num_gpus > 1:
+            results_ordered = []
+            for i in range(self.num_gpus):
+                results_ordered += results[i :: self.num_gpus]
+            results = results_ordered
+
+        if isinstance(metric, str):
+            metric = [metric]
+        allowed_metrics = ["mIoU", "mDice", "mFscore"]
+        if not set(metric).issubset(set(allowed_metrics)):
+            raise KeyError("metric {} is not supported".format(metric))
+
+        eval_results = {}
+        # test a list of files
+        if mmcv.is_list_of(results, np.ndarray) or mmcv.is_list_of(results, str):
+            if gt_seg_maps is None:
+                gt_seg_maps = self.get_gt_seg_maps()
+            num_classes = len(self.CLASSES)
+            ret_metrics = eval_metrics(
+                results,
+                gt_seg_maps,
+                num_classes,
+                self.ignore_index,
+                metric,
+                label_map=dict(),
+                reduce_zero_label=self.reduce_zero_label,
+            )
+        # test a list of pre_eval_results
+        else:
+            ret_metrics = pre_eval_to_metrics(results, metric)
+
+        # Because dataset.CLASSES is required for per-eval.
+        if self.CLASSES is None:
+            class_names = tuple(range(num_classes))
+        else:
+            class_names = self.CLASSES
+
+        # summary table
+        ret_metrics_summary = OrderedDict(
+            {
+                ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2)
+                for ret_metric, ret_metric_value in ret_metrics.items()
+            }
+        )
+
+        # each class table
+        ret_metrics.pop("aAcc", None)
+        ret_metrics_class = OrderedDict(
+            {
+                ret_metric: np.round(ret_metric_value * 100, 2)
+                for ret_metric, ret_metric_value in ret_metrics.items()
+            }
+        )
+        ret_metrics_class.update({"Class": class_names})
+        ret_metrics_class.move_to_end("Class", last=False)
+
+        # for logger
+        class_table_data = PrettyTable()
+        for key, val in ret_metrics_class.items():
+            class_table_data.add_column(key, val)
+
+        summary_table_data = PrettyTable()
+        for key, val in ret_metrics_summary.items():
+            if key == "aAcc":
+                summary_table_data.add_column(key, [val])
+            else:
+                summary_table_data.add_column("m" + key, [val])
+
+        print_log("per class results:", logger)
+        print_log("\n" + class_table_data.get_string(), logger=logger)
+        print_log("Summary:", logger)
+        print_log("\n" + summary_table_data.get_string(), logger=logger)
+
+        # each metric dict
+        for key, value in ret_metrics_summary.items():
+            if key == "aAcc":
+                eval_results[key] = value / 100.0
+            else:
+                eval_results["m" + key] = value / 100.0
+
+        ret_metrics_class.pop("Class", None)
+        for key, value in ret_metrics_class.items():
+            eval_results.update(
+                {
+                    key + "." + str(name): value[idx] / 100.0
+                    for idx, name in enumerate(class_names)
+                }
+            )
+
+        return eval_results
diff --git a/python/deeplake/integrations/mmseg/test_.py b/python/deeplake/integrations/mmseg/test_.py
new file mode 100644
index 0000000000..fb6e1cc526
--- /dev/null
+++ b/python/deeplake/integrations/mmseg/test_.py
@@ -0,0 +1,245 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+
+import mmcv
+import numpy as np
+import torch
+from mmcv.engine import collect_results_cpu, collect_results_gpu
+from mmcv.image import tensor2imgs
+from mmcv.runner import get_dist_info
+
+
+def np2tmp(array, temp_file_name=None, tmpdir=None):
+    """Save ndarray to local numpy file.
+
+    Args:
+        array (ndarray): Ndarray to save.
+        temp_file_name (str): Numpy file name. If 'temp_file_name=None', this
+            function will generate a file name with tempfile.NamedTemporaryFile
+            to save ndarray. Default: None.
+        tmpdir (str): Temporary directory to save Ndarray files. Default: None.
+    Returns:
+        str: The numpy file name.
+    """
+
+    if temp_file_name is None:
+        temp_file_name = tempfile.NamedTemporaryFile(
+            suffix=".npy", delete=False, dir=tmpdir
+        ).name
+    np.save(temp_file_name, array)
+    return temp_file_name
+
+
+def single_gpu_test(
+    model,
+    data_loader,
+    show=False,
+    out_dir=None,
+    efficient_test=False,
+    opacity=0.5,
+    pre_eval=False,
+    format_only=False,
+    format_args={},
+):
+    """Test with single GPU by progressive mode.
+
+    Args:
+        model (nn.Module): Model to be tested.
+        data_loader (utils.data.Dataloader): Pytorch data loader.
+        show (bool): Whether show results during inference. Default: False.
+        out_dir (str, optional): If specified, the results will be dumped into
+            the directory to save output results.
+        efficient_test (bool): Whether save the results as local numpy files to
+            save CPU memory during evaluation. Mutually exclusive with
+            pre_eval and format_results. Default: False.
+        opacity(float): Opacity of painted segmentation map.
+            Default 0.5.
+            Must be in (0, 1] range.
+        pre_eval (bool): Use dataset.pre_eval() function to generate
+            pre_results for metric evaluation. Mutually exclusive with
+            efficient_test and format_results. Default: False.
+        format_only (bool): Only format result for results commit.
+            Mutually exclusive with pre_eval and efficient_test.
+            Default: False.
+        format_args (dict): The args for format_results. Default: {}.
+    Returns:
+        list: list of evaluation pre-results or list of save file names.
+    """
+    if efficient_test:
+        warnings.warn(
+            "DeprecationWarning: ``efficient_test`` will be deprecated, the "
+            "evaluation is CPU memory friendly with pre_eval=True"
+        )
+        mmcv.mkdir_or_exist(".efficient_test")
+    # when none of them is set true, return segmentation results as
+    # a list of np.array.
+    assert [efficient_test, pre_eval, format_only].count(True) <= 1, (
+        "``efficient_test``, ``pre_eval`` and ``format_only`` are mutually "
+        "exclusive, only one of them could be true ."
+    )
+
+    model.eval()
+    results = []
+    dataset = data_loader.dataset.mmseg_dataset
+    prog_bar = mmcv.ProgressBar(len(dataset))
+    # The pipeline about how the data_loader retrieval samples from dataset:
+    # sampler -> batch_sampler -> indices
+    # The indices are passed to dataset_fetcher to get data from dataset.
+    # data_fetcher -> collate_fn(dataset[index]) -> data_sample
+    # we use batch_sampler to get correct data idx
+    loader_indices = data_loader.batch_sampler
+
+    for batch_indices, data in zip(loader_indices, data_loader):
+        with torch.no_grad():
+            result = model(return_loss=False, **data)
+
+        if show or out_dir:
+            img_tensor = data["img"][0]
+            img_metas = data["img_metas"][0].data[0]
+            imgs = tensor2imgs(img_tensor, **img_metas[0]["img_norm_cfg"])
+            assert len(imgs) == len(img_metas)
+
+            for img, img_meta in zip(imgs, img_metas):
+                h, w, _ = img_meta["img_shape"]
+                img_show = img[:h, :w, :]
+
+                ori_h, ori_w = img_meta["ori_shape"][:-1]
+                img_show = mmcv.imresize(img_show, (ori_w, ori_h))
+
+                if out_dir:
+                    out_file = osp.join(out_dir, img_meta["ori_filename"])
+                else:
+                    out_file = None
+
+                model.module.show_result(
+                    img_show,
+                    result,
+                    palette=dataset.PALETTE,
+                    show=show,
+                    out_file=out_file,
+                    opacity=opacity,
+                )
+
+        if efficient_test:
+            result = [np2tmp(_, tmpdir=".efficient_test") for _ in result]
+
+        if format_only:
+            result = dataset.format_results(
+                result, indices=batch_indices, **format_args
+            )
+        if pre_eval:
+            # TODO: adapt samples_per_gpu > 1.
+            # only samples_per_gpu=1 valid now
+            result = dataset.pre_eval(result, indices=batch_indices)
+            results.extend(result)
+        else:
+            results.extend(result)
+
+        batch_size = len(result)
+        for _ in range(batch_size):
+            prog_bar.update()
+
+    return results
+
+
+def multi_gpu_test(
+    model,
+    data_loader,
+    tmpdir=None,
+    gpu_collect=False,
+    efficient_test=False,
+    pre_eval=False,
+    format_only=False,
+    format_args={},
+):
+    """Test model with multiple gpus by progressive mode.
+
+    This method tests model with multiple gpus and collects the results
+    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+    it encodes results to gpu tensors and use gpu communication for results
+    collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+    and collects them by the rank 0 worker.
+
+    Args:
+        model (nn.Module): Model to be tested.
+        data_loader (utils.data.Dataloader): Pytorch data loader.
+        tmpdir (str): Path of directory to save the temporary results from
+            different gpus under cpu mode. The same path is used for efficient
+            test. Default: None.
+        gpu_collect (bool): Option to use either gpu or cpu to collect results.
+            Default: False.
+        efficient_test (bool): Whether save the results as local numpy files to
+            save CPU memory during evaluation. Mutually exclusive with
+            pre_eval and format_results. Default: False.
+        pre_eval (bool): Use dataset.pre_eval() function to generate
+            pre_results for metric evaluation. Mutually exclusive with
+            efficient_test and format_results. Default: False.
+        format_only (bool): Only format result for results commit.
+            Mutually exclusive with pre_eval and efficient_test.
+            Default: False.
+        format_args (dict): The args for format_results. Default: {}.
+
+    Returns:
+        list: list of evaluation pre-results or list of save file names.
+    """
+    if efficient_test:
+        warnings.warn(
+            "DeprecationWarning: ``efficient_test`` will be deprecated, the "
+            "evaluation is CPU memory friendly with pre_eval=True"
+        )
+        mmcv.mkdir_or_exist(".efficient_test")
+    # when none of them is set true, return segmentation results as
+    # a list of np.array.
+    assert [efficient_test, pre_eval, format_only].count(True) <= 1, (
+        "``efficient_test``, ``pre_eval`` and ``format_only`` are mutually "
+        "exclusive, only one of them could be true ."
+    )
+
+    model.eval()
+    results = []
+    dataset = data_loader.dataset.mmseg_dataset
+    # The pipeline about how the data_loader retrieval samples from dataset:
+    # sampler -> batch_sampler -> indices
+    # The indices are passed to dataset_fetcher to get data from dataset.
+    # data_fetcher -> collate_fn(dataset[index]) -> data_sample
+    # we use batch_sampler to get correct data idx
+
+    # batch_sampler based on DistributedSampler, the indices only point to data
+    # samples of related machine.
+    loader_indices = data_loader.batch_sampler
+
+    rank, world_size = get_dist_info()
+    if rank == 0:
+        prog_bar = mmcv.ProgressBar(len(dataset))
+
+    for batch_indices, data in zip(loader_indices, data_loader):
+        with torch.no_grad():
+            result = model(return_loss=False, rescale=True, **data)
+
+        if efficient_test:
+            result = [np2tmp(_, tmpdir=".efficient_test") for _ in result]
+
+        if format_only:
+            result = dataset.format_results(
+                result, indices=batch_indices, **format_args
+            )
+        if pre_eval:
+            # TODO: adapt samples_per_gpu > 1.
+            # only samples_per_gpu=1 valid now
+            result = dataset.pre_eval(result, indices=batch_indices)
+
+        results.extend(result)
+
+        if rank == 0:
+            batch_size = len(result) * world_size
+            for _ in range(batch_size):
+                prog_bar.update()
+
+    # collect results from all ranks
+    if gpu_collect:
+        results = collect_results_gpu(results, len(dataset))
+    else:
+        results = collect_results_cpu(results, len(dataset), tmpdir)
+    return results
diff --git a/python/deeplake/schemas.pyi b/python/deeplake/schemas.pyi
index 0e37e0c2c6..a18c5fc3ec 100644
--- a/python/deeplake/schemas.pyi
+++ b/python/deeplake/schemas.pyi
@@ -12,30 +12,39 @@ def TextEmbeddings(embedding_size: int, quantize: bool = False) -> SchemaTemplat
     """
     A schema for storing embedded text from documents.
 
-    - id (uint64)
-    - chunk_index (uint16) Position of the text_chunk within the document
-    - document_id (uint64) Unique identifier for the document the embedding came from
-    - date_created (uint64) Timestamp the document was read
-    - text_chunk (text) The text of the shard
-    - embedding (dtype=float32, size=embedding_size) The embedding of the text
+    This schema includes the following fields:
+    - id (uint64): Unique identifier for each entry.
+    - chunk_index (uint16): Position of the text chunk within the document.
+    - document_id (uint64): Unique identifier for the document the embedding came from.
+    - date_created (uint64): Timestamp when the document was read.
+    - text_chunk (text): The text of the shard.
+    - embedding (dtype=float32, size=embedding_size): The embedding of the text.
 
     Parameters:
-         embedding_size: Size of the embeddings
-         quantize: If true, quantize the embeddings to slightly decrease accuracy while greatly increasing query speed
+        embedding_size: int
+            Size of the embeddings.
+        quantize: bool, optional
+            If true, quantize the embeddings to slightly decrease accuracy while greatly increasing query speed. Default is False.
 
     Examples:
+        Create a dataset with the standard schema:
         ```python
-        # Create a dataset with the standard schema
-        ds = deeplake.create("ds_path",
-                schema=deeplake.schemas.TextEmbeddings(768).build())
-
-        # Customize the schema before creating the dataset
-        ds = deeplake.create("ds_path", schema=deeplake.schemas.TextEmbeddings(768)
-                .rename("embedding", "text_embed")
-                .add("author", types.Text())
-                .build())
+        ds = deeplake.create("tmp://", schema=deeplake.schemas.TextEmbeddings(768))
         ```
 
+        Customize the schema before creating the dataset:
+        ```python
+        ds = deeplake.create("tmp://", schema=deeplake.schemas.TextEmbeddings(768)
+            .rename("embedding", "text_embed")
+            .add("author", types.Text()))
+        ```
+
+        Add a new field to the schema:
+        ```python
+        schema = deeplake.schemas.TextEmbeddings(768)
+        schema.add("language", types.Text())
+        ds = deeplake.create("tmp://", schema=schema)
+        ```
     """
     ...
 
@@ -49,59 +58,101 @@ def COCOImages(
     """
     A schema for storing COCO-based image data.
 
-        - id (uint64)
-        - image (jpg image)
-        - url (text)
-        - year (uint8)
-        - version (text)
-        - description (text)
-        - contributor (text)
-        - date_created (uint64)
-        - date_captured (uint64)
-        - embedding (embedding)
-        - license (text)
-        - is_crowd (bool)
+    This schema includes the following fields:
+    - id (uint64): Unique identifier for each entry.
+    - image (jpg image): The image data.
+    - url (text): URL of the image.
+    - year (uint8): Year the image was captured.
+    - version (text): Version of the dataset.
+    - description (text): Description of the image.
+    - contributor (text): Contributor of the image.
+    - date_created (uint64): Timestamp when the image was created.
+    - date_captured (uint64): Timestamp when the image was captured.
+    - embedding (embedding): Embedding of the image.
+    - license (text): License information.
+    - is_crowd (bool): Whether the image contains a crowd.
 
     If `objects` is true, the following fields are added:
-        - objects_bbox (bounding box)
-        - objects_classes (segment mask)
+    - objects_bbox (bounding box): Bounding boxes for objects.
+    - objects_classes (segment mask): Segment masks for objects.
 
     If `keypoints` is true, the following fields are added:
-        - keypoints_bbox (bounding box)
-        - keypoints_classes (segment mask)
-        - keypoints (2-dimensional array of uint32)
-        - keypoints_skeleton (2-dimensional array of uint16)
+    - keypoints_bbox (bounding box): Bounding boxes for keypoints.
+    - keypoints_classes (segment mask): Segment masks for keypoints.
+    - keypoints (2-dimensional array of uint32): Keypoints data.
+    - keypoints_skeleton (2-dimensional array of uint16): Skeleton data for keypoints.
 
-    if `stuffs` is true, the following fields are added:
-        - stuffs_bbox (bounding boxes)
-        - stuffs_classes (segment mask)
+    If `stuffs` is true, the following fields are added:
+    - stuffs_bbox (bounding boxes): Bounding boxes for stuffs.
+    - stuffs_classes (segment mask): Segment masks for stuffs.
 
     Parameters:
-         embedding_size: Size of the embeddings
-         quantize: If true, quantize the embeddings to slightly decrease accuracy while greatly increasing query speed
+        embedding_size: int
+            Size of the embeddings.
+        quantize: bool, optional
+            If true, quantize the embeddings to slightly decrease accuracy while greatly increasing query speed. Default is False.
+        objects: bool, optional
+            Whether to include object-related fields. Default is True.
+        keypoints: bool, optional
+            Whether to include keypoint-related fields. Default is False.
+        stuffs: bool, optional
+            Whether to include stuff-related fields. Default is False.
 
     Examples:
+        Create a dataset with the standard schema:
         ```python
-        # Create a dataset with the standard schema
-        ds = deeplake.create("ds_path",
-            schema=deeplake.schemas.COCOImages(768).build())
+        ds = deeplake.create("tmp://", schema=deeplake.schemas.COCOImages(768))
+        ```
 
-        # Customize the schema before creating the dataset
-        ds = deeplake.create("ds_path", schema=deeplake.schemas.COCOImages(768,
-                objects=True, keypoints=True)
+        Customize the schema before creating the dataset:
+        ```python
+        ds = deeplake.create("tmp://", schema=deeplake.schemas.COCOImages(768, objects=True, keypoints=True)
             .rename("embedding", "image_embed")
-            .add("author", types.Text()).build())
+            .add("author", types.Text()))
         ```
 
+        Add a new field to the schema:
+        ```python
+        schema = deeplake.schemas.COCOImages(768)
+        schema.add("location", types.Text())
+        ds = deeplake.create("tmp://", schema=schema)
+        ```
     """
     ...
 
 class SchemaTemplate:
     """
-    A template that can be used for creating a new dataset with [deeplake.create][]
+    A template that can be used for creating a new dataset with [deeplake.create][].
+
+    This class allows you to define and customize the schema for your dataset.
+
+    Parameters:
+        schema: dict
+            A dictionary where the key is the column name and the value is the data type.
+
+    Methods:
+        add(name: str, dtype: deeplake._deeplake.types.DataType | str | deeplake._deeplake.types.Type) -> SchemaTemplate:
+            Adds a new column to the template.
+        remove(name: str) -> SchemaTemplate:
+            Removes a column from the template.
+        rename(old_name: str, new_name: str) -> SchemaTemplate:
+            Renames a column in the template.
+
+    Examples:
+        Create a new schema template, modify it, and create a dataset with the schema:
+        ```python
+        schema = deeplake.schemas.SchemaTemplate({
+            "id": types.UInt64(),
+            "text": types.Text(),
+            "embedding": types.Embedding(768)
+        })
+        schema.add("author", types.Text())
+        schema.remove("text")
+        schema.rename("embedding", "text_embedding")
+        ds = deeplake.create("tmp://", schema=schema)
+        ```
     """
 
-    # Temporary workaround. Need to remove `deeplake._deeplake` from the return type.
     def __init__(
         self,
         schema: dict[
@@ -109,9 +160,9 @@ class SchemaTemplate:
         ],
     ) -> None:
         """
-        Constructs a new SchemaTemplate from the given dict
+        Constructs a new SchemaTemplate from the given dict.
         """
-        ...
+        # ...existing code...
 
     def add(
         self,
@@ -119,20 +170,47 @@ class SchemaTemplate:
         dtype: deeplake._deeplake.types.DataType | str | deeplake._deeplake.types.Type,
     ) -> SchemaTemplate:
         """
-        Adds a new column to the template
+        Adds a new column to the template.
 
         Parameters:
-            name: The column name
-            dtype: The column data type
+            name: str
+                The column name.
+            dtype: deeplake._deeplake.types.DataType | str | deeplake._deeplake.types.Type
+                The column data type.
+
+        Returns:
+            SchemaTemplate: The updated schema template.
+
+        Examples:
+            Add a new column to the schema:
+            ```python
+            schema = deeplake.schemas.SchemaTemplate({})
+            schema.add("author", types.Text())
+            ```
         """
         ...
 
     def remove(self, name: str) -> SchemaTemplate:
         """
-        Removes a column from the template
+        Removes a column from the template.
 
         Parameters:
-            name: The column name
+            name: str
+                The column name.
+
+        Returns:
+            SchemaTemplate: The updated schema template.
+
+        Examples:
+            Remove a column from the schema:
+            ```python
+            schema = deeplake.schemas.SchemaTemplate({
+                "id": types.UInt64(),
+                "text": types.Text(),
+                "embedding": types.Embedding(768)
+            })
+            schema.remove("text")
+            ```
         """
         ...
 
@@ -141,7 +219,23 @@ class SchemaTemplate:
         Renames a column in the template.
 
         Parameters:
-            old_name: Existing column name
-            new_name: New column name
+            old_name: str
+                Existing column name.
+            new_name: str
+                New column name.
+
+        Returns:
+            SchemaTemplate: The updated schema template.
+
+        Examples:
+            Rename a column in the schema:
+            ```python
+            schema = deeplake.schemas.SchemaTemplate({
+                "id": types.UInt64(),
+                "text": types.Text(),
+                "embedding": types.Embedding(768)
+            })
+            schema.rename("embedding", "text_embedding")
+            ```
         """
         ...
diff --git a/python/deeplake/tql.pyi b/python/deeplake/tql.pyi
index 97f0d1e43c..fdb09e88c1 100644
--- a/python/deeplake/tql.pyi
+++ b/python/deeplake/tql.pyi
@@ -16,6 +16,15 @@ def register_function(function: typing.Callable) -> None:
     TQL interacts with Python functions through `numpy.ndarray`. The Python function
     to be used in TQL should accept input arguments as numpy arrays and return numpy array.
 
+    <!-- test-context
+    ```python
+    import deeplake
+    ds = deeplake.create("tmp://")
+    ds.add_column("column_name", "int32")
+    ds.append({"column_name": [1, 2, 3]})
+    ```
+    -->
+
     Examples:
         ```python
         def next_number(a):
diff --git a/python/deeplake/types.pyi b/python/deeplake/types.pyi
index 896b198fc9..a0603be9d8 100644
--- a/python/deeplake/types.pyi
+++ b/python/deeplake/types.pyi
@@ -306,13 +306,15 @@ def Array(dtype: DataType | str, dimensions: int, shape: list[int]) -> DataType:
         DataType: A new array data type with the specified parameters.
 
     Examples:
-        Create a three-dimensional array, where each dimension can have any number of elements::
-
-            ds.add_column("col1", types.Array("int32", dimensions=3))
+        Create a three-dimensional array, where each dimension can have any number of elements:
+        ```python
+        ds.add_column("col1", types.Array("int32", dimensions=3))
+        ```
         
-        Create a three-dimensional array, where each dimension has a known size::
-
-            ds.add_column("col2", types.Array(types.Float32(), shape=[50, 30, 768]))
+        Create a three-dimensional array, where each dimension has a known size:
+        ```python
+        ds.add_column("col2", types.Array(types.Float32(), shape=[50, 30, 768]))
+        ```
     """
     ...
 
@@ -324,10 +326,11 @@ def Bool() -> DataType:
         DataType: A new boolean data type.
 
     Examples:
-        Create columns with boolean type::
-
-            ds.add_column("col1", types.Bool)
-            ds.add_column("col2", "bool")
+        Create columns with boolean type:
+        ```python
+        ds.add_column("col1", types.Bool)
+        ds.add_column("col2", "bool")
+        ```
     """
     ...
 
@@ -349,13 +352,14 @@ def Text(index_type: str | TextIndexType | None = None) -> Type:
         Type: A new text data type.
 
     Examples:
-        Create text columns with different configurations::
-
-            ds.add_column("col1", types.Text)
-            ds.add_column("col2", "text")
-            ds.add_column("col3", str)
-            ds.add_column("col4", types.Text(index_type=types.Inverted))
-            ds.add_column("col4", types.Text(index_type=types.BM25))
+        Create text columns with different configurations:
+        ```python
+        ds.add_column("col1", types.Text)
+        ds.add_column("col2", "text")
+        ds.add_column("col3", str)
+        ds.add_column("col4", types.Text(index_type=types.Inverted))
+        ds.add_column("col5", types.Text(index_type=types.BM25))
+        ```
     """
     ...
 
@@ -387,11 +391,12 @@ def Dict() -> Type:
         :func:`deeplake.types.Struct` for a type that supports defining allowed keys.
 
     Examples:
-        Create and use a dictionary column::
-
-            ds.add_column("col1", types.Dict)
-            ds.append([{"col1": {"a": 1, "b": 2}}])
-            ds.append([{"col1": {"b": 3, "c": 4}}])
+        Create and use a dictionary column:
+        ```python
+        ds.add_column("col1", types.Dict)
+        ds.append([{"col1": {"a": 1, "b": 2}}])
+        ds.append([{"col1": {"b": 3, "c": 4}}])
+        ```
     """
     ...
 
@@ -419,10 +424,11 @@ def Embedding(
         :func:`deeplake.types.Array` for a multidimensional array.
 
     Examples:
-        Create embedding columns::
-
-            ds.add_column("col1", types.Embedding(768))
-            ds.add_column("col2", types.Embedding(768, quantization=types.QuantizationType.Binary))
+        Create embedding columns:
+        ```python
+        ds.add_column("col1", types.Embedding(768))
+        ds.add_column("col2", types.Embedding(768, quantization=types.QuantizationType.Binary))
+        ```
     """
     ...
 
@@ -434,9 +440,10 @@ def Float32() -> DataType:
         DataType: A new 32-bit float data type.
 
     Examples:
-        Create a column with 32-bit float type::
-
-            ds.add_column("col1", types.Float32)
+        Create a column with 32-bit float type:
+        ```python
+        ds.add_column("col1", types.Float32)
+        ```
     """
     ...
 
@@ -448,9 +455,10 @@ def Float64() -> DataType:
         DataType: A new 64-bit float data type.
 
     Examples:
-        Create a column with 64-bit float type::
-
-            ds.add_column("col1", types.Float64)
+        Create a column with 64-bit float type:
+        ```python
+        ds.add_column("col1", types.Float64)
+        ```
     """
     ...
 
@@ -462,9 +470,10 @@ def Int16() -> DataType:
         DataType: A new 16-bit integer data type.
 
     Examples:
-        Create a column with 16-bit integer type::
-
-            ds.add_column("col1", types.Int16)
+        Create a column with 16-bit integer type:
+        ```python
+        ds.add_column("col1", types.Int16)
+        ```
     """
     ...
 
@@ -476,9 +485,10 @@ def Int32() -> DataType:
         DataType: A new 32-bit integer data type.
 
     Examples:
-        Create a column with 32-bit integer type::
-
-            ds.add_column("col1", types.Int32)
+        Create a column with 32-bit integer type:
+        ```python
+        ds.add_column("col1", types.Int32)
+        ```
     """
     ...
 
@@ -490,9 +500,10 @@ def Int64() -> DataType:
         DataType: A new 64-bit integer data type.
 
     Examples:
-        Create a column with 64-bit integer type::
-
-            ds.add_column("col1", types.Int64)
+        Create a column with 64-bit integer type:
+        ```python
+        ds.add_column("col1", types.Int64)
+        ```
     """
     ...
 
@@ -504,9 +515,10 @@ def Int8() -> DataType:
         DataType: A new 8-bit integer data type.
 
     Examples:
-        Create a column with 8-bit integer type::
-
-            ds.add_column("col1", types.Int8)
+        Create a column with 8-bit integer type:
+        ```python
+        ds.add_column("col1", types.Int8)
+        ```
     """
     ...
 
@@ -526,9 +538,10 @@ def Sequence(nested_type: DataType | str | Type) -> Type:
         Type: A new sequence data type.
 
     Examples:
-        Create a sequence of images::
-
-            ds.add_column("col1", types.Sequence(types.Image(sample_
+        Create a sequence of images:
+        ```python
+        ds.add_column("col1", types.Sequence(types.Image(sample_compression="jpg")))
+        ```
     """
 
 def Image(dtype: DataType | str = "uint8", sample_compression: str = "png") -> Type:
@@ -554,7 +567,7 @@ def Image(dtype: DataType | str = "uint8", sample_compression: str = "png") -> T
     Examples:
         ```python
         ds.add_column("col1", types.Image)
-        ds.add_column("col1", types.Image(sample_compression="jpg"))
+        ds.add_column("col2", types.Image(sample_compression="jpg"))
         ```
     """
     ...
@@ -615,7 +628,7 @@ def BinaryMask(
     Examples:
         ```python
         ds.add_column("col1", types.BinaryMask(sample_compression="lz4"))
-        ds.append(np.zeros((512, 512, 5), dtype="bool"))
+        ds.append([{"col1": np.zeros((512, 512, 5), dtype="bool")}])
         ```
     """
     ...
@@ -637,7 +650,7 @@ def SegmentMask(
     Examples:
         ```python
         ds.add_column("col1", types.SegmentMask(sample_compression="lz4"))
-        ds.append("col1", np.zeros((512, 512)))
+        ds.append([{"col1": np.zeros((512, 512, 3))}])
         ```
     """
     ...
@@ -655,14 +668,12 @@ def Struct(fields: dict[str, DataType | str]) -> DataType:
         ```python
         ds.add_column("col1", types.Struct({
            "field1": types.Int16(),
-           "field2": types.Text(),
+           "field2": "text",
         }))
         
         ds.append([{"col1": {"field1": 3, "field2": "a"}}])
         print(ds[0]["col1"]["field1"]) # Output: 3
         ```
-
-
     """
     ...