diff --git a/python/deeplake/__init__.py b/python/deeplake/__init__.py
index 5684486de2..538b73c46e 100644
--- a/python/deeplake/__init__.py
+++ b/python/deeplake/__init__.py
@@ -14,7 +14,7 @@ def progress_bar(iterable, *args, **kwargs):
import deeplake
from ._deeplake import *
-__version__ = "4.1.1"
+__version__ = "4.1.2"
__all__ = [
"__version__",
@@ -128,9 +128,7 @@ def progress_bar(iterable, *args, **kwargs):
"types",
"Client",
"client",
- "__child_atfork",
"__prepare_atfork",
- "__parent_atfork",
]
@@ -259,7 +257,7 @@ def transfer_with_links(source, dest, links, column_names):
def __register_at_fork():
- from ._deeplake import __prepare_atfork, __parent_atfork, __child_atfork
+ from ._deeplake import __prepare_atfork
UNSAFE_TYPES = (
Dataset,
@@ -300,13 +298,13 @@ def check_main_globals_for_unsafe_types():
def before_fork():
check_main_globals_for_unsafe_types()
- __prepare_atfork()
+ pass
def after_fork_parent():
- __parent_atfork()
+ pass
def after_fork_child():
- __child_atfork()
+ pass
os.register_at_fork(
before=before_fork,
@@ -314,5 +312,11 @@ def after_fork_child():
after_in_child=after_fork_child,
)
+ ff = os.fork
+ def fork():
+ __prepare_atfork()
+ return ff()
+
+ os.fork = fork
__register_at_fork()
diff --git a/python/deeplake/__init__.pyi b/python/deeplake/__init__.pyi
index 7fb934332a..7ee819eb5b 100644
--- a/python/deeplake/__init__.pyi
+++ b/python/deeplake/__init__.pyi
@@ -118,304 +118,572 @@ __all__ = [
"types",
"Client",
"client",
- "__child_atfork",
"__prepare_atfork",
- "__parent_atfork",
]
class Future:
"""
- A future that represents a value that will be resolved in the future.
+ A future representing an asynchronous operation result in ML pipelines.
- Once the Future is resolved, it will hold the result, and you can retrieve it
- using either a blocking call (`result()`) or via asynchronous mechanisms (`await`).
-
- The future will resolve automatically even if you do not explicitly wait for it.
+ The Future class enables non-blocking operations for data loading and processing,
+ particularly useful when working with large ML datasets or distributed training.
+ Once resolved, the Future holds the operation result which can be accessed either
+ synchronously or asynchronously.
Methods:
result() -> typing.Any:
- Blocks until the Future is resolved and returns the object.
+ Blocks until the Future resolves and returns the result.
__await__() -> typing.Any:
- Awaits the future asynchronously and returns the object once it's ready.
+ Enables using the Future in async/await syntax.
is_completed() -> bool:
- Returns True if the Future is already resolved, False otherwise.
+ Checks if the Future has resolved without blocking.
+
+
+ Examples:
+ Loading ML dataset asynchronously:
+ ```python
+ future = deeplake.open_async("s3://ml-data/embeddings")
+
+ # Check status without blocking
+ if not future.is_completed():
+ print("Still loading...")
+
+ # Block until ready
+ ds = future.result()
+ ```
+
+ Using with async/await:
+ ```python
+ async def load_data():
+ ds = await deeplake.open_async("s3://ml-data/images")
+ batch = await ds.images.get_async(slice(0, 32))
+ return batch
+ ```
"""
def result(self) -> typing.Any:
"""
- Blocks until the Future is resolved, then returns the result.
+ Blocks until the Future resolves and returns the result.
Returns:
- typing.Any: The result when the Future is resolved.
+ typing.Any: The operation result once resolved.
+
+
+
+ Examples:
+ ```python
+ future = ds["images"].get_async(slice(0, 32))
+ batch = future.result() # Blocks until batch is loaded
+ ```
"""
...
def __await__(self) -> typing.Any:
"""
- Awaits the resolution of the Future asynchronously.
+ Makes the Future compatible with async/await syntax.
+
+
Examples:
- ```python
- result = await future
- ```
+ ```python
+ async def load_batch():
+ batch = await ds["images"].get_async(slice(0, 32))
+ ```
Returns:
- typing.Any: The result when the Future is resolved.
+ typing.Any: The operation result once resolved.
"""
...
def is_completed(self) -> bool:
"""
- Checks if the Future has been resolved.
+ Checks if the Future has resolved without blocking.
Returns:
- bool: True if the Future is resolved, False otherwise.
+ bool: True if resolved, False if still pending.
+
+
+
+ Examples:
+ ```python
+ future = ds.query_async("SELECT * WHERE label = 'car'")
+ if future.is_completed():
+ results = future.result()
+ else:
+ print("Query still running...")
+ ```
"""
...
class FutureVoid:
"""
- A future that represents the completion of an operation that returns no result.
-
- The future will resolve automatically to `None`, even if you do not explicitly wait for it.
+ A Future representing a void async operation in ML pipelines.
+
+ Similar to Future but for operations that don't return values, like saving
+ or committing changes. Useful for non-blocking data management operations.
Methods:
wait() -> None:
- Blocks until the FutureVoid is resolved and then returns `None`.
+ Blocks until operation completes.
__await__() -> None:
- Awaits the FutureVoid asynchronously and returns `None` once the operation is complete.
+ Enables using with async/await syntax.
is_completed() -> bool:
- Returns True if the FutureVoid is already resolved, False otherwise.
+ Checks completion status without blocking.
+
+
+
+ Examples:
+ Asynchronous dataset updates:
+ ```python
+ # Update embeddings without blocking
+ future = ds["embeddings"].set_async(slice(0, 32), new_embeddings)
+
+ # Do other work while update happens
+ process_other_data()
+
+ # Wait for update to complete
+ future.wait()
+ ```
+
+ Using with async/await:
+ ```python
+ async def update_dataset():
+ await ds.commit_async()
+ print("Changes saved")
+ ```
"""
def wait(self) -> None:
"""
- Blocks until the FutureVoid is resolved, then returns `None`.
+ Blocks until the operation completes.
+
+
Examples:
```python
- future_void.wait() # Blocks until the operation completes.
+ future = ds.commit_async()
+ future.wait() # Blocks until commit finishes
```
-
- Returns:
- None: Indicates the operation has completed.
"""
...
def __await__(self) -> None:
"""
- Awaits the resolution of the FutureVoid asynchronously.
+ Makes the FutureVoid compatible with async/await syntax.
- Examples:
- ```python
- await future_void # Waits for the completion of the async operation.
- ```
+
- Returns:
- None: Indicates the operation has completed.
+ Examples:
+ ```python
+ async def save_changes():
+ await ds.commit_async()
+ ```
"""
...
def is_completed(self) -> bool:
"""
- Checks if the FutureVoid has been resolved.
+ Checks if the operation has completed without blocking.
Returns:
- bool: True if the FutureVoid is resolved, False otherwise.
+ bool: True if completed, False if still running.
+
+
+
+ Examples:
+ ```python
+ future = ds.commit_async()
+ if future.is_completed():
+ print("Commit finished")
+ else:
+ print("Commit still running...")
+ ```
"""
...
class ReadOnlyMetadata:
"""
- ReadOnlyMetadata is a key-value store.
+ Read-only access to dataset and column metadata for ML workflows.
+
+ Stores important information about datasets like:
+ - Model parameters and hyperparameters
+ - Preprocessing statistics (mean, std, etc.)
+ - Data splits and fold definitions
+ - Version and training information
+
+
+
+ Examples:
+ Accessing model metadata:
+ ```python
+ metadata = ds.metadata
+ model_name = metadata["model_name"]
+ model_params = metadata["hyperparameters"]
+ ```
+
+ Reading preprocessing stats:
+ ```python
+ mean = ds["images"].metadata["mean"]
+ std = ds["images"].metadata["std"]
+ ```
"""
def __getitem__(self, key: str) -> typing.Any:
"""
- Get the value for the given key
+ Gets metadata value for the given key.
+
+ Args:
+ key: Metadata key to retrieve
+
+ Returns:
+ The stored metadata value
+
+
+
+ Examples:
+ ```python
+ mean = ds["images"].metadata["mean"]
+ std = ds["images"].metadata["std"]
+ ```
"""
...
def keys(self) -> list[str]:
"""
- Return a list of all keys in the metadata
+ Lists all available metadata keys.
+
+ Returns:
+ list[str]: List of metadata key names
+
+
+
+ Examples:
+ ```python
+ # Print all metadata
+ for key in metadata.keys():
+ print(f"{key}: {metadata[key]}")
+ ```
"""
...
class Metadata(ReadOnlyMetadata):
"""
- Metadata is a key-value store.
+ Writable access to dataset and column metadata for ML workflows.
+
+ Stores important information about datasets like:
+ - Model parameters and hyperparameters
+ - Preprocessing statistics
+ - Data splits and fold definitions
+ - Version and training information
+
+ Changes are persisted immediately without requiring `commit()`.
+
+ Examples:
+ Storing model metadata:
+
+ dataset.metadata["model_name"] = "resnet50"
+ dataset.metadata["hyperparameters"] = {
+ "learning_rate": 0.001,
+ "batch_size": 32
+ }
+
+ Setting preprocessing stats:
+
+ dataset.images.metadata["mean"] = [0.485, 0.456, 0.406]
+ dataset.images.metadata["std"] = [0.229, 0.224, 0.225]
"""
def __setitem__(self, key: str, value: typing.Any) -> None:
"""
- Set the value for the given key. Setting the value will immediately persist the change without requiring a commit().
+ Sets metadata value for given key. Changes are persisted immediately.
+
+ Args:
+ key: Metadata key to set
+ value: Value to store
+
+ Examples:
+ ```python
+ ds.metadata["train_split"] = 0.8
+ ds.metadata["val_split"] = 0.1
+ ds.metadata["test_split"] = 0.1
+ ```
"""
...
def query(query: str, token: str | None = None) -> DatasetView:
"""
- Executes a TQL (Tensor Query Language) query and returns a filtered DatasetView.
+ Executes TQL queries optimized for ML data filtering and search.
- TQL provides SQL-like querying capabilities specifically designed for ML datasets, allowing you
- to filter, sort, and select data based on various criteria including vector similarity.
+ TQL is a SQL-like query language designed for ML datasets, supporting:
+ - Vector similarity search
+ - Text semantic search
+ - Complex data filtering
+ - Joining across datasets
+ - Efficient sorting and pagination
Args:
- query: A TQL query string. The query can:
- - Filter rows using WHERE clauses
- - Sort results using ORDER BY
- - Select specific columns using SELECT
- - Perform vector similarity search using BM25_SIMILARITY
- - Join multiple datasets
- token: Optional Activeloop token for authentication. Not required if using environment
- credentials.
+ query: TQL query string supporting:
+ - Vector similarity: COSINE_SIMILARITY, EUCLIDEAN_DISTANCE
+ - Text search: BM25_SIMILARITY, CONTAINS
+ - Filtering: WHERE clauses
+ - Sorting: ORDER BY
+ - Joins: JOIN across datasets
+ token: Optional Activeloop authentication token
Returns:
- DatasetView: A view containing the query results. The view can be:
- - Used directly for ML training
+ DatasetView: Query results that can be:
+ - Used directly in ML training
- Further filtered with additional queries
- Converted to PyTorch/TensorFlow dataloaders
- Materialized into a new dataset
- Examples:
- Basic filtering:
- ```python
- # Select images with high confidence labels
- view = deeplake.query(f'SELECT * FROM "{ds_path}" WHERE confidence > 0.9')
-
- # Get samples from specific classes
- cats = deeplake.query(f'SELECT * FROM "{ds_path}" WHERE label IN (\'cat\', \'kitten\')')
- ```
+
- Text similarity search:
+ Examples:
+ Vector similarity search:
```python
- # Find semantically similar text using BM25
- similar = deeplake.query(f'''
- SELECT * FROM "{ds_path}"
- ORDER BY BM25_SIMILARITY(text_column, 'query text') DESC
+ # Find similar embeddings
+ similar = deeplake.query('''
+ SELECT * FROM "mem://embeddings"
+ ORDER BY COSINE_SIMILARITY(vector, ARRAY[0.1, 0.2, 0.3]) DESC
LIMIT 100
''')
+
+ # Use results in training
+ dataloader = similar.pytorch()
```
- Vector similarity search:
+ Text semantic search:
```python
- # Find nearest neighbor embeddings
- neighbors = deeplake.query(f'''
- SELECT * FROM "{ds_path}"
- ORDER BY COSINE_SIMILARITY(embedding, ARRAY[0.1, 0.2, ...]) DESC
+ # Search documents using BM25
+ relevant = deeplake.query('''
+ SELECT * FROM "mem://documents"
+ ORDER BY BM25_SIMILARITY(text, 'machine learning') DESC
LIMIT 10
''')
```
- Joins across datasets:
+ Complex filtering:
```python
- # Join images with their metadata
- results = deeplake.query(f'''
- SELECT i.image, m.label, m.bbox
- FROM "{image_ds_path}" AS i
- JOIN "{metadata_ds_path}" AS m ON i.id = m.image_id
- WHERE m.verified = true
+ # Filter training data
+ train = deeplake.query('''
+ SELECT * FROM "mem://dataset"
+ WHERE "split" = 'train'
+ AND confidence > 0.9
+ AND label IN ('cat', 'dog')
''')
```
- Using with ML frameworks:
+ Joins for feature engineering:
```python
- # Filter dataset and create PyTorch dataloader
- train_data = deeplake.query("SELECT * FROM dataset WHERE split = 'train'")
- train_loader = train_data.pytorch().dataloader(batch_size=32)
+ # Combine image features with metadata
+ features = deeplake.query('''
+ SELECT i.image, i.embedding, m.labels, m.metadata
+ FROM "mem://images" AS i
+ JOIN "mem://metadata" AS m ON i.id = m.image_id
+ WHERE m.verified = true
+ ''')
```
"""
...
def query_async(query: str, token: str | None = None) -> Future:
"""
- Asynchronously executes a TQL (Tensor Query Language) query and returns a Future that will resolve into DatasetView.
+ Asynchronously executes TQL queries optimized for ML data filtering and search.
- TQL provides SQL-like querying capabilities specifically designed for ML datasets, allowing you
- to filter, sort, and select data based on various criteria including vector similarity.
+ Non-blocking version of `query()` for better performance with large datasets.
+ Supports the same TQL features including vector similarity search, text search,
+ filtering, and joins.
Args:
- query: A TQL query string. The query can:
- - Filter rows using WHERE clauses
- - Sort results using ORDER BY
- - Select specific columns using SELECT
- - Perform vector similarity search using BM25_SIMILARITY
- - Join multiple datasets
- token: Optional Activeloop token for authentication. Not required if using environment
- credentials.
+ query: TQL query string supporting:
+ - Vector similarity: COSINE_SIMILARITY, EUCLIDEAN_DISTANCE
+ - Text search: BM25_SIMILARITY, CONTAINS
+ - Filtering: WHERE clauses
+ - Sorting: ORDER BY
+ - Joins: JOIN across datasets
+ token: Optional Activeloop authentication token
Returns:
- Future: A Future object that resolves to a DatasetView. The resulting view can be:
- - Used directly for ML training
+ Future: Resolves to DatasetView that can be:
+ - Used directly in ML training
- Further filtered with additional queries
- Converted to PyTorch/TensorFlow dataloaders
- Materialized into a new dataset
- Examples:
- Basic filtering with await:
- ```python
- # Select images with high confidence labels
- view = await deeplake.query_async(f'SELECT * FROM "{ds_path}" WHERE confidence > 0.9')
-
- # Get samples from specific classes
- cats = await deeplake.query_async(f'SELECT * FROM "{ds_path}" WHERE label IN (\'cat\', \'kitten\')')
- ```
+
- Text similarity search with Future.result():
+ Examples:
+ Basic async query:
```python
- # Find semantically similar text using BM25
- future = deeplake.query_async(f'''
- SELECT * FROM "{ds_path}"
- ORDER BY BM25_SIMILARITY(text_column, 'query text') DESC
- LIMIT 100
+ # Run query asynchronously
+ future = deeplake.query_async('''
+ SELECT * FROM "mem://embeddings"
+ ORDER BY COSINE_SIMILARITY(vector, ARRAY[0.1, 0.2, 0.3]) DESC
''')
- similar = future.result() # Blocks until query completes
- ```
- Vector similarity search:
- ```python
- # Find nearest neighbor embeddings
- neighbors = await deeplake.query_async(f'''
- SELECT * FROM "{ds_path}"
- ORDER BY COSINE_SIMILARITY(embedding, ARRAY[0.1, 0.2, ...]) DESC
- LIMIT 10
- ''')
- ```
+ # Do other work while query runs
+ prepare_training()
- Joins across datasets:
- ```python
- # Join images with their metadata
- results = await deeplake.query_async(f'''
- SELECT i.image, m.label, m.bbox
- FROM "{image_ds_path}" AS i
- JOIN "{metadata_ds_path}" AS m ON i.id = m.image_id
- WHERE m.verified = true
- ''')
+ # Get results when needed
+ results = future.result()
```
- Using with ML frameworks:
+ With async/await:
```python
- # Filter dataset and create PyTorch dataloader
- future = deeplake.query_async(f'SELECT * FROM "{ds_path}" WHERE split = \'train\'')
- train_data = future.result()
- train_loader = train_data.pytorch().dataloader(batch_size=32)
+ async def search_similar():
+ results = await deeplake.query_async('''
+ SELECT * FROM "mem://images"
+ ORDER BY COSINE_SIMILARITY(embedding, ARRAY[0.1, 0.2, 0.3]) DESC
+ LIMIT 100
+ ''')
+ return results
+
+ async def main():
+ similar = await search_similar()
```
Non-blocking check:
```python
- # Check if query is complete without blocking
- future = deeplake.query_async(f'SELECT * FROM "{ds_path}"')
+ future = deeplake.query_async(
+ "SELECT * FROM dataset WHERE \\"split\\" = 'train'"
+ )
+
if future.is_completed():
- results = future.result()
+ train_data = future.result()
+ else:
+ print("Query still running...")
```
"""
...
class Client:
+ """
+ Client for connecting to Activeloop services.
+ Handles authentication and API communication.
+ """
endpoint: str
class Tag:
@@ -635,36 +903,46 @@ class ColumnView:
- Access column metadata and properties
- Get information about linked data if the column contains references
+
+
Examples:
- Load image data from a column for training
+ Load image data from a column for training:
```python
# Access a single image
- image = dataset["images"][0]
+ image = ds["images"][0]
# Load a batch of images
- batch = dataset["images"][0:32]
+ batch = ds["images"][0:32]
# Async load for better performance
- images_future = dataset["images"].get_async(0:32)
+ images_future = ds["images"].get_async(slice(0, 32))
images = images_future.result()
```
- Access embeddings for similarity search
+ Access embeddings for similarity search:
```python
# Get all embeddings
- embeddings = dataset["embeddings"][:]
+ embeddings = ds["embeddings"][:]
# Get specific embeddings by indices
- selected = dataset["embeddings"][[1, 5, 10]]
+ selected = ds["embeddings"][[1, 5, 10]]
```
- Check column properties
+ Check column properties:
```python
# Get column name
- name = dataset["images"].name
+ name = ds["images"].name
# Access metadata
- if "mean" in dataset["images"].metadata:
+ if "mean" in ds["images"].metadata.keys():
mean = dataset["images"].metadata["mean"]
```
"""
@@ -682,6 +960,16 @@ class ColumnView:
Returns:
The data at the specified index/indices. Type depends on the column's data type.
+
+
Examples:
```python
# Get single item
@@ -710,14 +998,26 @@ class ColumnView:
Returns:
Future: A Future object that resolves to the requested data.
+
+
Examples:
```python
# Async batch load
- future = column.get_async(0:32)
+ future = column.get_async(slice(0, 32))
batch = future.result()
# Using with async/await
- batch = await column.get_async(0:32)
+ async def load_batch():
+ batch = await column.get_async(slice(0, 32))
+ return batch
```
"""
...
@@ -750,6 +1050,20 @@ class ColumnView:
Access the column's metadata. Useful for storing statistics, preprocessing parameters,
or other information about the column data.
+ Returns:
+ ReadOnlyMetadata: A ReadOnlyMetadata object for reading metadata.
+
+
+
Examples:
```python
# Access preprocessing parameters
@@ -786,32 +1100,49 @@ class Column(ColumnView):
- Access and modify column metadata
- Handle various data types common in ML: images, embeddings, labels, etc.
+
+
Examples:
- Update training labels
+ Update training labels:
```python
# Update single label
- dataset["labels"][0] = 1
+ ds["labels"][0] = 1
# Update batch of labels
- dataset["labels"][0:32] = new_labels
+ ds["labels"][0:32] = new_labels
# Async update for better performance
- future = dataset["labels"].set_async(0:32, new_labels)
+ future = ds["labels"].set_async(slice(0, 32), new_labels)
future.wait()
```
- Store image embeddings
+ Store image embeddings:
```python
# Generate and store embeddings
embeddings = model.encode(images)
- dataset["embeddings"][0:len(embeddings)] = embeddings
+ ds["embeddings"][0:len(embeddings)] = embeddings
```
- Manage column metadata
+ Manage column metadata:
```python
# Store preprocessing parameters
- dataset["images"].metadata["mean"] = [0.485, 0.456, 0.406]
- dataset["images"].metadata["std"] = [0.229, 0.224, 0.225]
+ ds["images"].metadata["mean"] = [0.485, 0.456, 0.406]
+ ds["images"].metadata["std"] = [0.229, 0.224, 0.225]
```
"""
@@ -825,6 +1156,18 @@ class Column(ColumnView):
- slice: Range of indices (e.g., 0:10)
value: The data to store. Must match the column's data type.
+
+
Examples:
```python
# Update single item
@@ -850,14 +1193,27 @@ class Column(ColumnView):
Returns:
FutureVoid: A FutureVoid that completes when the update is finished.
+
+
Examples:
```python
# Async batch update
- future = column.set_async(0:32, new_batch)
+ future = column.set_async(slice(0, 32), new_batch)
future.wait()
# Using with async/await
- await column.set_async(0:32, new_batch)
+ async def update_batch():
+ await column.set_async(slice(0, 32), new_batch)
```
"""
...
@@ -926,6 +1282,16 @@ class Row:
Returns:
Future: A Future object that will resolve to the value containing the column data.
+
+
Examples:
```python
future = row.get_async("column_name")
@@ -955,6 +1321,17 @@ class Row:
Returns:
FutureVoid: A FutureVoid object that will resolve when the operation is complete.
+
+
Examples:
```python
future_void = row.set_async("column_name", new_value)
@@ -1004,6 +1381,16 @@ class RowRange:
Returns:
Future: A Future object that will resolve to the value containing the column data.
+
+
Examples:
```python
future = row_range.get_async("column_name")
@@ -1033,6 +1420,17 @@ class RowRange:
Returns:
FutureVoid: A FutureVoid object that will resolve when the operation is complete.
+
+
Examples:
```python
future_void = row_range.set_async("column_name", new_value)
@@ -1085,6 +1483,16 @@ class RowRangeView:
Returns:
Future: A Future object that will resolve to the value containing the column data.
+
+
Examples:
```python
future = row_range_view.get_async("column_name")
@@ -1118,6 +1526,16 @@ class RowView:
Returns:
Future: A Future object that will resolve to the value containing the column data.
+
+
Examples:
```python
future = row_view.get_async("column_name")
@@ -1221,7 +1639,6 @@ class DatasetView:
# process row
pass
```
-
"""
...
@@ -1239,15 +1656,6 @@ class DatasetView:
```python
ds.summary()
```
-
- Example Output:
- ```
- Dataset length: 5
- Columns:
- id : int64
- title : text
- embedding: embedding(768)
- ```
"""
...
@@ -1255,13 +1663,20 @@ class DatasetView:
"""
Executes the given TQL query against the dataset and return the results as a [deeplake.DatasetView][].
+
+
Examples:
```python
result = ds.query("select * where category == 'active'")
for row in result:
print("Id is: ", row["id"])
```
-
"""
...
@@ -1269,6 +1684,14 @@ class DatasetView:
"""
Asynchronously executes the given TQL query against the dataset and return a future that will resolve into [deeplake.DatasetView][].
+
+
Examples:
```python
future = ds.query_async("select * where category == 'active'")
@@ -1276,11 +1699,12 @@ class DatasetView:
for row in result:
print("Id is: ", row["id"])
- # or use the Future in an await expression
- future = ds.query_async("select * where category == 'active'")
- result = await future
- for row in result:
- print("Id is: ", row["id"])
+ async def query_and_process():
+ # or use the Future in an await expression
+ future = ds.query_async("select * where category == 'active'")
+ result = await future
+ for row in result:
+ print("Id is: ", row["id"])
```
"""
...
@@ -1304,14 +1728,23 @@ class DatasetView:
Raises:
ImportError: If TensorFlow is not installed
+
+
Examples:
```python
- ds = deeplake.open("path/to/dataset")
- dl = ds.tensorflow().shuffle(500).batch(32).
- for i_batch, sample_batched in enumerate(dataloader):
+ dl = ds.tensorflow().shuffle(500).batch(32)
+ for i_batch, sample_batched in enumerate(dl):
process_batch(sample_batched)
```
-
"""
...
@@ -1327,17 +1760,27 @@ class DatasetView:
Raises:
ImportError: If pytorch is not installed
+
+
Examples:
```python
from torch.utils.data import DataLoader
- ds = deeplake.open("path/to/dataset")
- dataloader = DataLoader(ds.pytorch(), batch_size=60,
- shuffle=True, num_workers=10)
- for i_batch, sample_batched in enumerate(dataloader):
- process_batch(sample_batched)
+ dl = DataLoader(ds.pytorch(), batch_size=60,
+ shuffle=True, num_workers=8)
+ for i_batch, sample_batched in enumerate(dl):
+ process_batch(sample_batched)
```
-
"""
...
@@ -1349,8 +1792,8 @@ class DatasetView:
batch_size: Number of rows in each batch
drop_last: Whether to drop the final batch if it is incomplete
- Examples:
- ```python
+ Examples:
+ ```python
ds = deeplake.open("al://my_org/dataset")
batches = ds.batches(batch_size=2000, drop_last=True)
for batch in batches:
@@ -1479,6 +1922,15 @@ class Dataset(DatasetView):
- `tuple`: A tuple of indices specifying the rows to return. Returns a [deeplake.RowRange][]
- `str`: A string specifying column to return all values from. Returns a [deeplake.Column][]
+
+
Examples:
```python
row = ds[318]
@@ -1493,7 +1945,6 @@ class Dataset(DatasetView):
column_data = ds["id"]
```
-
"""
...
@@ -1507,7 +1958,6 @@ class Dataset(DatasetView):
# process row
pass
```
-
"""
...
@@ -1550,7 +2000,7 @@ class Dataset(DatasetView):
```python
ds.add_column("labels", deeplake.types.Int32)
- ds.add_column("labels", "int32")
+ ds.add_column("categories", "int32")
ds.add_column("name", deeplake.types.Text())
@@ -1558,7 +2008,7 @@ class Dataset(DatasetView):
ds.add_column("images", deeplake.types.Image(dtype=deeplake.types.UInt8(), sample_compression="jpeg"))
- ds.add_column("embedding", deeplake.types.Embedding(dtype=deeplake.types.Float32(), dimensions=768))
+ ds.add_column("embedding", deeplake.types.Embedding(size=768))
```
Raises:
@@ -1572,6 +2022,14 @@ class Dataset(DatasetView):
Args:
name: The name of the column to remove
+
+
Examples:
```python
ds.remove_column("name")
@@ -1589,6 +2047,14 @@ class Dataset(DatasetView):
name: The name of the column to rename
new_name: The new name to set to column
+
+
Examples:
```python
ds.rename_column("old_name", "new_name")
@@ -1620,21 +2086,37 @@ class Dataset(DatasetView):
Args:
data: The data to insert into the dataset.
+
+
Examples:
```python
ds.append({"name": ["Alice", "Bob"], "age": [25, 30]})
ds.append([{"name": "Alice", "age": 25}, {"name": "Bob", "age": 30}])
+ ```
- ds.append({
+ ```python
+ ds2.append({
"embedding": np.random.rand(4, 768),
"text": ["Hello World"] * 4})
- ds.append([{"embedding": np.random.rand(768), "text": "Hello World"}] * 4)
+ ds2.append([{"embedding": np.random.rand(768), "text": "Hello World"}] * 4)
```
```python
- ds.append(deeplake.from_parquet("./file.parquet"))
+ ds2.append(deeplake.from_parquet("./file.parquet"))
```
Raises:
@@ -1662,12 +2144,9 @@ class Dataset(DatasetView):
Examples:
```python
ds.commit()
- ```
- ```python
ds.commit("Added data from updated documents")
```
-
"""
def commit_async(self, message: str | None = None) -> FutureVoid:
@@ -1682,21 +2161,12 @@ class Dataset(DatasetView):
Examples:
```python
ds.commit_async().wait()
- ```
- ```python
ds.commit_async("Added data from updated documents").wait()
- ```
- ```python
- await ds.commit_async()
- ```
-
- ```python
- await ds.commit_async("Added data from updated documents")
- ```
+ async def do_commit():
+ await ds.commit_async()
- ```python
future = ds.commit_async() # then you can check if the future is completed using future.is_completed()
```
"""
@@ -1807,7 +2277,6 @@ class ReadOnlyDataset(DatasetView):
# process row
pass
```
-
"""
...
@@ -1953,6 +2422,12 @@ class InvalidPolygonShapeError(Exception):
class InvalidLinkDataError(Exception):
pass
+class InvalidCredsKeyAssignmentError(Exception):
+ pass
+
+class CredsKeyAlreadyAssignedError(Exception):
+ pass
+
class GcsStorageProviderFailed(Exception):
pass
@@ -2055,12 +2530,6 @@ class UnsupportedChunkCompression(Exception):
class InvalidImageCompression(Exception):
pass
-class InvalidCredsKeyAssignmentError(Exception):
- pass
-
-class CredsKeyAlreadyAssignedError(Exception):
- pass
-
class InvalidSegmentMaskCompression(Exception):
pass
@@ -2206,11 +2675,21 @@ def create(
token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
schema (dict): The initial schema to use for the dataset. See `deeplake.schema` such as [deeplake.schemas.TextEmbeddings][] for common starting schemas.
+
+
Examples:
```python
- import deeplake
- from deeplake import types
-
# Create a dataset in your local filesystem:
ds = deeplake.create("directory_path")
ds.add_column("id", types.Int32())
@@ -2219,42 +2698,23 @@ def create(
ds.commit()
ds.summary()
```
- Output:
- ```
- Dataset length: 0
- Columns:
- id : int32
- url : text
- embedding: embedding(768)
- ```
```python
# Create dataset in your app.activeloop.ai organization:
ds = deeplake.create("al://organization_id/dataset_name")
- ```
- ```python
# Create a dataset stored in your cloud using specified credentials:
ds = deeplake.create("s3://mybucket/my_dataset",
- creds = {"aws_access_key_id": ..., ...})
- ```
+ creds = {"aws_access_key_id": id, "aws_secret_access_key": key})
- ```python
# Create dataset stored in your cloud using app.activeloop.ai managed credentials.
ds = deeplake.create("s3://mybucket/my_dataset",
creds = {"creds_key": "managed_creds_key"}, org_id = "my_org_id")
- ```
- ```python
- # Create dataset stored in your cloud using app.activeloop.ai managed credentials.
ds = deeplake.create("azure://bucket/path/to/dataset")
- ```
- ```python
ds = deeplake.create("gcs://bucket/path/to/dataset")
- ```
- ```python
ds = deeplake.create("mem://in-memory")
```
@@ -2275,54 +2735,50 @@ def create_async(
To open an existing dataset, use [deeplake.open_async][].
+
+
Examples:
```python
- import deeplake
- from deeplake import types
-
- # Asynchronously create a dataset in your local filesystem:
- ds = await deeplake.create_async("directory_path")
- await ds.add_column("id", types.Int32())
- await ds.add_column("url", types.Text())
- await ds.add_column("embedding", types.Embedding(768))
- await ds.commit()
- await ds.summary() # Example of usage in an async context
- ```
+ async def create_dataset():
+ # Asynchronously create a dataset in your local filesystem:
+ ds = await deeplake.create_async("directory_path")
+ await ds.add_column("id", types.Int32())
+ await ds.add_column("url", types.Text())
+ await ds.add_column("embedding", types.Embedding(768))
+ await ds.commit()
+ await ds.summary() # Example of usage in an async context
- ```python
- # Alternatively, create a dataset using .result().
- future_ds = deeplake.create_async("directory_path")
- ds = future_ds.result() # Blocks until the dataset is created
- ```
+ # Alternatively, create a dataset using .result().
+ future_ds = deeplake.create_async("directory_path")
+ ds = future_ds.result() # Blocks until the dataset is created
- ```python
- # Create a dataset in your app.activeloop.ai organization:
- ds = await deeplake.create_async("al://organization_id/dataset_name")
- ```
+ # Create a dataset in your app.activeloop.ai organization:
+ ds = await deeplake.create_async("al://organization_id/dataset_name")
- ```python
- # Create a dataset stored in your cloud using specified credentials:
- ds = await deeplake.create_async("s3://mybucket/my_dataset",
- creds={"aws_access_key_id": ..., ...})
- ```
+ # Create a dataset stored in your cloud using specified credentials:
+ ds = await deeplake.create_async("s3://mybucket/my_dataset",
+ creds={"aws_access_key_id": id, "aws_secret_access_key": key})
- ```python
- # Create dataset stored in your cloud using app.activeloop.ai managed credentials.
- ds = await deeplake.create_async("s3://mybucket/my_dataset",
- creds={"creds_key": "managed_creds_key"}, org_id="my_org_id")
- ```
+ # Create dataset stored in your cloud using app.activeloop.ai managed credentials.
+ ds = await deeplake.create_async("s3://mybucket/my_dataset",
+ creds={"creds_key": "managed_creds_key"}, org_id="my_org_id")
- ```python
- # Create dataset stored in your cloud using app.activeloop.ai managed credentials.
- ds = await deeplake.create_async("azure://bucket/path/to/dataset")
- ```
+ ds = await deeplake.create_async("azure://bucket/path/to/dataset")
- ```python
- ds = await deeplake.create_async("gcs://bucket/path/to/dataset")
- ```
+ ds = await deeplake.create_async("gcs://bucket/path/to/dataset")
- ```python
- ds = await deeplake.create_async("mem://in-memory")
+ ds = await deeplake.create_async("mem://in-memory")
```
Raises:
@@ -2348,11 +2804,18 @@ def copy(
dst_creds (dict, str, optional): The string ``ENV`` or a dictionary containing credentials used to access the destination dataset at the path.
token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
+
+
Examples:
```python
deeplake.copy("al://organization_id/source_dataset", "al://organization_id/destination_dataset")
```
-
"""
def delete(
@@ -2388,7 +2851,7 @@ def open(
See [deeplake.open_read_only][] for opening the dataset in read only mode
- To create a new dataset, see [deeplake.open][]
+ To create a new dataset, see [deeplake.create][]
Args:
url: The URL of the dataset. URLs can be specified using the following protocols:
@@ -2410,33 +2873,32 @@ def open(
- If nothing is given is, credentials are fetched from the environment variables. This is also the case when creds is not passed for cloud datasets
token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
+
+
Examples:
```python
# Load dataset managed by Deep Lake.
ds = deeplake.open("al://organization_id/dataset_name")
- ```
- ```python
# Load dataset stored in your cloud using your own credentials.
ds = deeplake.open("s3://bucket/my_dataset",
- creds = {"aws_access_key_id": ..., ...})
- ```
+ creds = {"aws_access_key_id": id, "aws_secret_access_key": key})
- ```python
# Load dataset stored in your cloud using Deep Lake managed credentials.
ds = deeplake.open("s3://bucket/my_dataset",
- ...creds = {"creds_key": "managed_creds_key"}, org_id = "my_org_id")
- ```
+ creds = {"creds_key": "managed_creds_key"}, org_id = "my_org_id")
- ```python
ds = deeplake.open("s3://bucket/path/to/dataset")
- ```
- ```python
ds = deeplake.open("azure://bucket/path/to/dataset")
- ```
- ```python
ds = deeplake.open("gcs://bucket/path/to/dataset")
```
"""
@@ -2451,38 +2913,27 @@ def open_async(
Examples:
```python
- # Asynchronously load dataset managed by Deep Lake using await.
- ds = await deeplake.open_async("al://organization_id/dataset_name")
- ```
+ async def async_open():
+ # Asynchronously load dataset managed by Deep Lake using await.
+ ds = await deeplake.open_async("al://organization_id/dataset_name")
- ```python
- # Asynchronously load dataset stored in your cloud using your own credentials.
- ds = await deeplake.open_async("s3://bucket/my_dataset",
- creds={"aws_access_key_id": ..., ...})
- ```
+ # Asynchronously load dataset stored in your cloud using your own credentials.
+ ds = await deeplake.open_async("s3://bucket/my_dataset",
+ creds={"aws_access_key_id": id, "aws_secret_access_key": key})
- ```python
- # Asynchronously load dataset stored in your cloud using Deep Lake managed credentials.
- ds = await deeplake.open_async("s3://bucket/my_dataset",
- creds={"creds_key": "managed_creds_key"}, org_id="my_org_id")
- ```
+ # Asynchronously load dataset stored in your cloud using Deep Lake managed credentials.
+ ds = await deeplake.open_async("s3://bucket/my_dataset",
+ creds={"creds_key": "managed_creds_key"}, org_id="my_org_id")
- ```python
- ds = await deeplake.open_async("s3://bucket/path/to/dataset")
- ```
+ ds = await deeplake.open_async("s3://bucket/path/to/dataset")
- ```python
- ds = await deeplake.open_async("azure://bucket/path/to/dataset")
- ```
+ ds = await deeplake.open_async("azure://bucket/path/to/dataset")
- ```python
- ds = await deeplake.open_async("gcs://bucket/path/to/dataset")
- ```
+ ds = await deeplake.open_async("gcs://bucket/path/to/dataset")
- ```python
- # Alternatively, load the dataset using .result().
- future_ds = deeplake.open_async("al://organization_id/dataset_name")
- ds = future_ds.result() # Blocks until the dataset is loaded
+ # Alternatively, load the dataset using .result().
+ future_ds = deeplake.open_async("al://organization_id/dataset_name")
+ ds = future_ds.result() # Blocks until the dataset is loaded
```
"""
@@ -2509,12 +2960,18 @@ def like(
- If nothing is given is, credentials are fetched from the environment variables. This is also the case when creds is not passed for cloud datasets
token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
+
+
Examples:
```python
ds = deeplake.like(src="az://bucket/existing/to/dataset",
dest="s3://bucket/new/dataset")
```
-
"""
def connect(
@@ -2538,33 +2995,30 @@ def connect(
creds_key (str, optional): The creds_key of the managed credentials that will be used to access the source path. If not set, use the organization's default credentials.
token (str, optional): Activeloop token used to fetch the managed credentials.
+
+
Examples:
```python
ds = deeplake.connect("s3://bucket/path/to/dataset",
"al://my_org/dataset")
- ```
- ```python
ds = deeplake.connect("s3://bucket/path/to/dataset",
"al://my_org/dataset", creds_key="my_key")
- ```
- ```python
# Connect the dataset as al://my_org/dataset
ds = deeplake.connect("s3://bucket/path/to/dataset",
org_id="my_org")
- ```
- ```python
ds = deeplake.connect("az://bucket/path/to/dataset",
"al://my_org/dataset", creds_key="my_key")
- ```
- ```python
ds = deeplake.connect("gcs://bucket/path/to/dataset",
"al://my_org/dataset", creds_key="my_key")
- ```
-
"""
def disconnect(url: str, token: str | None = None) -> None:
@@ -2584,7 +3038,6 @@ def disconnect(url: str, token: str | None = None) -> None:
```python
deeplake.disconnect("al://my_org/dataset_name")
```
-
"""
def open_read_only(
@@ -2618,39 +3071,26 @@ def open_read_only(
token (str, optional): Activeloop token to authenticate user.
Examples:
- ```python
+
ds = deeplake.open_read_only("directory_path")
ds.summary()
- ```
Example Output:
- ```
Dataset length: 5
Columns:
id : int32
url : text
embedding: embedding(768)
- ```
- ```python
ds = deeplake.open_read_only("file:///path/to/dataset")
- ```
- ```python
ds = deeplake.open_read_only("s3://bucket/path/to/dataset")
- ```
- ```python
ds = deeplake.open_read_only("azure://bucket/path/to/dataset")
- ```
- ```python
ds = deeplake.open_read_only("gcs://bucket/path/to/dataset")
- ```
- ```python
ds = deeplake.open_read_only("mem://in-memory")
- ```
"""
def open_read_only_async(
@@ -2662,36 +3102,69 @@ def open_read_only_async(
See [deeplake.open_async][] for opening datasets for modification and [deeplake.open_read_only][] for sync open.
Examples:
- ```python
+
# Asynchronously open a dataset in read-only mode:
ds = await deeplake.open_read_only_async("directory_path")
- ```
- ```python
# Alternatively, open the dataset using .result().
future_ds = deeplake.open_read_only_async("directory_path")
ds = future_ds.result() # Blocks until the dataset is loaded
- ```
- ```python
ds = await deeplake.open_read_only_async("file:///path/to/dataset")
- ```
- ```python
ds = await deeplake.open_read_only_async("s3://bucket/path/to/dataset")
- ```
- ```python
ds = await deeplake.open_read_only_async("azure://bucket/path/to/dataset")
- ```
- ```python
ds = await deeplake.open_read_only_async("gcs://bucket/path/to/dataset")
- ```
- ```python
ds = await deeplake.open_read_only_async("mem://in-memory")
+ """
+
+def convert(
+ src: str,
+ dst: str,
+ dst_creds: dict[str, str] | None = None,
+ token: str | None = None
+) -> None:
+ """
+ Converts a Deep Lake v3 dataset to the new v4 format while preserving data and metadata.
+ Optimized for ML workloads with efficient handling of large datasets and linked data.
+
+ Args:
+ src: URL of the source v3 dataset to convert
+ dst: Destination URL for the new v4 dataset. Supports:
+ - `file://path` local storage
+ - `s3://bucket/path` S3 storage
+ - `gs://bucket/path` Google Cloud storage
+ - `azure://bucket/path` Azure storage
+ dst_creds: Optional credentials for accessing the destination storage.
+ Supports cloud provider credentials like access keys
+ token: Optional Activeloop authentication token
+
+ <-- test-context
+ ```python
+ import deeplake
+ deeplake.convert = lambda src, dst, dst_creds = None, token = None: None
+ ```
+ -->
+
+ Examples:
+ ```python
+ # Convert local dataset
+ deeplake.convert("old_dataset/", "new_dataset/")
+
+ # Convert cloud dataset with credentials
+ deeplake.convert(
+ "s3://old-bucket/dataset",
+ "s3://new-bucket/dataset",
+ dst_creds={"aws_access_key_id": "key",
+ "aws_secret_access_key": "secret"}
+ )
```
+
+ Notes:
+ - You can open v3 dataset without converting it to v4 using `deeplake.query('SELECT * FROM "old_dataset/"')`
"""
def from_parquet(url: str) -> ReadOnlyDataset:
@@ -2702,6 +3175,4 @@ def from_parquet(url: str) -> ReadOnlyDataset:
url: The URL of the Parquet dataset. If no protocol is specified, it assumes `file://`
"""
-def __child_atfork() -> None: ...
-def __parent_atfork() -> None: ...
def __prepare_atfork() -> None: ...
diff --git a/python/deeplake/ingestion/__init__.py b/python/deeplake/ingestion/__init__.py
new file mode 100644
index 0000000000..698af773c9
--- /dev/null
+++ b/python/deeplake/ingestion/__init__.py
@@ -0,0 +1 @@
+from deeplake.ingestion.coco.ingest_coco import ingest_coco
\ No newline at end of file
diff --git a/python/deeplake/ingestion/coco/__init__.py b/python/deeplake/ingestion/coco/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/deeplake/ingestion/coco/exceptions.py b/python/deeplake/ingestion/coco/exceptions.py
new file mode 100644
index 0000000000..4703dc9a4f
--- /dev/null
+++ b/python/deeplake/ingestion/coco/exceptions.py
@@ -0,0 +1,11 @@
+class CocoAnnotationMissingError(Exception):
+ def __init__(self, keys):
+ super().__init__(
+ (
+ "COCO dataset ingestion expects to have `instances`, `keypoints` and `stuff`. "
+ "{} {} missing."
+ ).format(
+ f"Key {keys[0]}" if len(keys) == 1 else f"Keys {', '.join(keys)}",
+ "is" if len(keys) == 1 else "are",
+ )
+ )
diff --git a/python/deeplake/ingestion/coco/ingest_coco.py b/python/deeplake/ingestion/coco/ingest_coco.py
new file mode 100644
index 0000000000..f3fe86db84
--- /dev/null
+++ b/python/deeplake/ingestion/coco/ingest_coco.py
@@ -0,0 +1,366 @@
+from typing import Union, Optional, List, Dict
+import pathlib
+from deeplake.ingestion.coco.exceptions import CocoAnnotationMissingError
+import deeplake as dp
+import numpy as np
+from tqdm import tqdm
+import os
+
+COCO_REQUIRED_KEYS = ["instances", "keypoints", "stuff"]
+MASKS_NOTE = "All segmentation polygons and RLEs were converted to stacked binary masks"
+
+
+def convert_pathlib_to_string_if_needed(path: Union[str, pathlib.Path]) -> str:
+ if isinstance(path, pathlib.Path):
+ path = str(path)
+ return path
+
+
+def verify_coco_annotation_dict(
+ annotation_files: Dict[str, Union[str, pathlib.Path]] = {}
+):
+ if all(key in annotation_files for key in COCO_REQUIRED_KEYS):
+ return {
+ key: convert_pathlib_to_string_if_needed(value)
+ for key, value in annotation_files.items()
+ }
+ else:
+ raise CocoAnnotationMissingError(
+ list(COCO_REQUIRED_KEYS - annotation_files.keys())
+ )
+
+
+class COCOStructuredDataset:
+ def __init__(
+ self,
+ dataset: dp.Dataset = None,
+ images_directory: Union[str, pathlib.Path] = None,
+ annotation_files: Dict[str, Union[str, pathlib.Path]] = {},
+ ):
+ from pycocotools.coco import COCO
+
+ self.dataset = dataset
+ self.images_directory = images_directory
+ self.annotation_files = annotation_files
+
+ self.coco = COCO(self.annotation_files["instances"])
+ self.coco_kp = COCO(self.annotation_files["keypoints"])
+ self.coco_stuff = COCO(self.annotation_files["stuff"])
+
+ self.category_info = self.coco.loadCats(self.coco.getCatIds())
+ self.category_info_kp = self.coco_kp.loadCats(self.coco_kp.getCatIds())
+ self.category_info_stuff = self.coco_stuff.loadCats(self.coco_stuff.getCatIds())
+ self.img_ids = sorted(self.coco.getImgIds()) # Image ids for uploading
+
+ self.cat_names = [category["name"] for category in self.category_info]
+ self.super_cat_names = list(
+ set([category["supercategory"] for category in self.category_info])
+ )
+ self.cat_names_kp = [category["name"] for category in self.category_info_kp]
+ self.super_cat_names_kp = list(
+ set([category["supercategory"] for category in self.category_info_kp])
+ )
+ self.cat_names_stuff = [
+ category["name"] for category in self.category_info_stuff
+ ]
+ self.super_cat_names_stuff = list(
+ set([category["supercategory"] for category in self.category_info_stuff])
+ )
+
+ def get_kp_group_data(self, height, width, anns_kp):
+ # Iterate through keypoints and parse each
+ categories_kp = np.zeros((len(anns_kp)))
+ supercats_kp = np.zeros((len(anns_kp)))
+ masks_kp = np.zeros((height, width, len(anns_kp)))
+ boxes_kp = np.zeros((len(anns_kp), 4))
+ keypoints_kp = np.zeros((51, len(anns_kp)))
+
+ for j, ann_kp in enumerate(anns_kp):
+ categories_kp[j] = self.cat_names_kp.index(
+ [
+ self.category_info_kp[i]["name"]
+ for i in range(len(self.category_info_kp))
+ if self.category_info_kp[i]["id"] == ann_kp["category_id"]
+ ][0]
+ )
+ supercats_kp[j] = self.super_cat_names_kp.index(
+ [
+ self.category_info_kp[i]["supercategory"]
+ for i in range(len(self.category_info_kp))
+ if self.category_info_kp[i]["id"] == ann_kp["category_id"]
+ ][0]
+ )
+ mask_kp = self.coco.annToMask(ann_kp) # Convert annotation to mask
+ masks_kp[:, :, j] = mask_kp
+ boxes_kp[j, :] = ann_kp["bbox"]
+ keypoints_kp[:, j] = np.array(ann_kp["keypoints"])
+
+ return categories_kp, supercats_kp, masks_kp, boxes_kp, keypoints_kp
+
+ def get_stuff_group_data(self, height, width, ann, anns_stuff):
+ # Iterate through stuff and parse each
+ masks_stuff = np.zeros((height, width, len(anns_stuff)))
+ boxes_stuff = np.zeros((len(anns_stuff), 4))
+ categories_stuff = np.zeros((len(anns_stuff)))
+ areas_stuff = np.zeros((len(anns_stuff)))
+ iscrowds_stuff = np.zeros((len(anns_stuff)))
+ supercats_stuff = np.zeros((len(anns_stuff)))
+
+ for k, ann_stuff in enumerate(anns_stuff):
+ mask_stuff = self.coco.annToMask(ann_stuff) # Convert annotation to mask
+ masks_stuff[:, :, k] = mask_stuff
+ boxes_stuff[k, :] = ann["bbox"]
+
+ # Do a brute force search and make no assumptions between order of relationship of category ids
+ categories_stuff[k] = self.cat_names_stuff.index(
+ [
+ self.category_info_stuff[i]["name"]
+ for i in range(len(self.category_info_stuff))
+ if self.category_info_stuff[i]["id"] == ann_stuff["category_id"]
+ ][0]
+ )
+ supercats_stuff[k] = self.super_cat_names_stuff.index(
+ [
+ self.category_info_stuff[i]["supercategory"]
+ for i in range(len(self.category_info_stuff))
+ if self.category_info_stuff[i]["id"] == ann_stuff["category_id"]
+ ][0]
+ )
+
+ areas_stuff[k] = ann_stuff["area"]
+ iscrowds_stuff[k] = ann_stuff["iscrowd"]
+
+ if "segmentation" not in ann_stuff:
+ print("----No segmentation found. Exiting.------")
+ print("Annotation length: {}".format(len(anns_stuff)))
+ print("----image id: {}----".format(img_id))
+ print("----Exiting.------")
+
+ return (
+ masks_stuff,
+ boxes_stuff,
+ categories_stuff,
+ areas_stuff,
+ iscrowds_stuff,
+ supercats_stuff,
+ )
+
+ def create_structure(self):
+ self.dataset.add_column(
+ "images", dp.types.Image(dp.types.UInt8(), sample_compression="jpg")
+ )
+ self.dataset.add_column("masks", dp.types.BinaryMask(sample_compression="lz4"))
+ self.dataset.add_column(
+ "boxes", dp.types.BoundingBox(dp.types.Float32(), "ltrb", "pixel")
+ )
+ self.dataset.add_column(
+ "categories", dp.types.ClassLabel(dp.types.Array("uint32", 1))
+ )
+ self.dataset["categories"].metadata["class_names"] = self.cat_names
+ self.dataset.add_column(
+ "super_categories", dp.types.ClassLabel(dp.types.Array("uint32", 1))
+ )
+ self.dataset["super_categories"].metadata["class_names"] = self.super_cat_names
+ self.dataset.add_column("areas", dp.types.Array("uint32", 1))
+ self.dataset.add_column("iscrowds", dp.types.Array("bool", 1))
+ self.dataset.add_column("images_meta", dp.types.Dict())
+
+ # Pose
+ self.dataset.add_column(
+ "pose/categories", dp.types.ClassLabel(dp.types.Array("uint32", 1))
+ )
+ self.dataset["pose/categories"].metadata["class_names"] = self.cat_names_kp
+ self.dataset.add_column(
+ "pose/super_categories", dp.types.ClassLabel(dp.types.Array("uint32", 1))
+ )
+ self.dataset["pose/super_categories"].metadata[
+ "class_names"
+ ] = self.super_cat_names_kp
+ self.dataset.add_column(
+ "pose/boxes", dp.types.BoundingBox(dp.types.Float32(), "LTWH", "pixel")
+ )
+ self.dataset.add_column(
+ "pose/keypoints", dp.types.Array("int32", 2)
+ ) # htype="keypoints_coco"
+ self.dataset.add_column(
+ "pose/masks", dp.types.BinaryMask(sample_compression="lz4")
+ )
+
+ # Stuff
+ self.dataset.add_column(
+ "stuff/masks", dp.types.BinaryMask(sample_compression="lz4")
+ )
+ self.dataset.add_column(
+ "stuff/boxes", dp.types.BoundingBox(dp.types.Float32(), "LTWH", "pixel")
+ )
+ self.dataset.add_column(
+ "stuff/categories", dp.types.ClassLabel(dp.types.Array("uint32", 1))
+ )
+ self.dataset["stuff/categories"].metadata["class_names"] = self.cat_names_stuff
+ self.dataset.add_column(
+ "stuff/super_categories", dp.types.ClassLabel(dp.types.Array("uint32", 1))
+ )
+ self.dataset["stuff/super_categories"].metadata[
+ "class_names"
+ ] = self.super_cat_names_stuff
+ self.dataset.add_column("stuff/areas", dp.types.Array("uint32", 1))
+ self.dataset.add_column("stuff/iscrowds", dp.types.Array("bool", 1))
+
+ # update metadatas
+ self.dataset["categories"].metadata["category_info"] = self.category_info
+ self.dataset["categories"].metadata[
+ "notes"
+ ] = "Numeric labels for categories represent the position of the class in the ds[categories].medatata['class_names'] list, and not the COCO category id."
+ self.dataset["super_categories"].metadata["category_info"] = self.category_info
+ self.dataset["super_categories"].metadata[
+ "notes"
+ ] = "Numeric labels for categories represent the position of the class in the ds[super_categories].medatata['class_names'] list, and not the COCO category id."
+
+ self.dataset["masks"].metadata["notes"] = MASKS_NOTE
+ self.dataset["pose/masks"].metadata["category_info"] = self.category_info_kp
+ self.dataset["pose/masks"].metadata["notes"] = MASKS_NOTE
+ self.dataset["pose/keypoints"].metadata["keypoints"] = [
+ category["keypoints"] for category in self.category_info_kp
+ ][0]
+ self.dataset["pose/keypoints"].metadata["connections"] = [
+ category["skeleton"] for category in self.category_info_kp
+ ][0]
+
+ self.dataset["stuff/masks"].metadata["category_info"] = self.category_info_stuff
+ self.dataset["stuff/masks"].metadata["notes"] = MASKS_NOTE
+
+ def ingest_columns(self):
+ for ii, img_id in enumerate(tqdm(self.img_ids), start=1):
+ ann_ids = self.coco.getAnnIds(img_id)
+ ann_ids_kp = self.coco_kp.getAnnIds(img_id)
+ ann_ids_stuff = self.coco_stuff.getAnnIds(img_id)
+ anns = self.coco.loadAnns(ann_ids)
+ anns_kp = self.coco_kp.loadAnns(ann_ids_kp)
+ anns_stuff = self.coco_stuff.loadAnns(ann_ids_stuff)
+
+ img_coco = self.coco.loadImgs(img_id)[0]
+ img_path = os.path.join(self.images_directory, img_coco["file_name"])
+ with open(img_path, "rb") as file:
+ image_bytes = file.read()
+ (height, width) = (img_coco["height"], img_coco["width"])
+ masks = np.zeros((height, width, len(anns)))
+ boxes = np.zeros((len(anns), 4))
+ categories = np.zeros((len(anns)))
+ areas = np.zeros((len(anns)))
+ iscrowds = np.zeros((len(anns)))
+ supercats = np.zeros((len(anns)))
+
+ for i, ann in enumerate(anns):
+ mask = self.coco.annToMask(ann)
+ masks[:, :, i] = mask
+ boxes[i, :] = ann["bbox"]
+
+ categories[i] = self.cat_names.index(
+ [
+ self.category_info[i]["name"]
+ for i in range(len(self.category_info))
+ if self.category_info[i]["id"] == ann["category_id"]
+ ][0]
+ )
+ supercats[i] = self.super_cat_names.index(
+ [
+ self.category_info[i]["supercategory"]
+ for i in range(len(self.category_info))
+ if self.category_info[i]["id"] == ann["category_id"]
+ ][0]
+ )
+
+ areas[i] = ann["area"]
+ iscrowds[i] = ann["iscrowd"]
+
+ if "segmentation" not in ann:
+ print("----No segmentation found. Exiting.------")
+ print("Annotation length: {}".format(len(anns)))
+ print("----image id: {}----".format(img_id))
+ print("----Exiting.------")
+
+ (categories_kp, supercats_kp, masks_kp, boxes_kp, keypoints_kp) = (
+ self.get_kp_group_data(height, width, anns_kp)
+ )
+
+ (
+ masks_stuff,
+ boxes_stuff,
+ categories_stuff,
+ areas_stuff,
+ iscrowds_stuff,
+ supercats_stuff,
+ ) = self.get_stuff_group_data(height, width, ann, anns_stuff)
+
+ in_dict = {
+ "images": [image_bytes],
+ "images_meta": [img_coco],
+ "masks": [masks.astype("bool")],
+ "boxes": [boxes.astype("float32")],
+ "categories": [categories.astype("uint32")],
+ "super_categories": [supercats.astype("uint32")],
+ "areas": [areas.astype("uint32")],
+ "iscrowds": [iscrowds.astype("bool")],
+ "pose/categories": [categories_kp.astype("uint32")],
+ "pose/super_categories": [supercats_kp.astype("uint32")],
+ "pose/boxes": [boxes_kp.astype("float32")],
+ "pose/masks": [masks_kp.astype("bool")],
+ "pose/keypoints": [keypoints_kp.astype("int32")],
+ "stuff/masks": [masks_stuff.astype("bool")],
+ "stuff/boxes": [boxes_stuff.astype("float32")],
+ "stuff/categories": [categories_stuff.astype("uint32")],
+ "stuff/super_categories": [supercats_stuff.astype("uint32")],
+ "stuff/areas": [areas_stuff.astype("uint32")],
+ "stuff/iscrowds": [iscrowds_stuff.astype("bool")],
+ }
+ self.dataset.append(in_dict)
+ self.dataset.commit("Finished ingestion")
+
+ def structure(self):
+ self.create_structure()
+ self.ingest_columns()
+
+
+def ingest_coco(
+ images_directory: Union[str, pathlib.Path],
+ annotation_files: Dict[str, Union[str, pathlib.Path]],
+ dest: Union[str, pathlib.Path],
+ dest_creds: Optional[Dict[str, str]] = None,
+):
+ """Ingest images and annotations in COCO format to a Deep Lake Dataset. The source data can be stored locally or in the cloud.
+
+ Args:
+ images_directory (str, pathlib.Path): The path to the directory containing images.
+ annotation_files Dict(str, Union[str, pathlib.Path]): dictionary from key to path to JSON annotation file in COCO format.
+ - the required keys are the following `instances`, `keypoints` and `stuff`
+ dest (str, pathlib.Path):
+ - The full path to the dataset. Can be:
+ - a Deep Lake cloud path of the form ``al://org_id/datasetname``. To write to Deep Lake cloud datasets, ensure that you are authenticated to Deep Lake (pass in a token using the 'token' parameter).
+ - an s3 path of the form ``s3://bucketname/path/to/dataset``. Credentials are required in either the environment or passed to the creds argument.
+ - a local file system path of the form ``./path/to/dataset`` or ``~/path/to/dataset`` or ``path/to/dataset``.
+ - a memory path of the form ``mem://path/to/dataset`` which doesn't save the dataset but keeps it in memory instead. Should be used only for testing as it does not persist.
+ dest_creds (Optional[Dict[str, str]]): The dictionary containing credentials used to access the destination path of the dataset.
+
+ Returns:
+ Dataset: The Dataset created from images and COCO annotations.
+
+ Raises:
+ CocoAnnotationMissingError: If one or many annotation key is missing from file.
+ """
+
+ dest = convert_pathlib_to_string_if_needed(dest)
+ images_directory = convert_pathlib_to_string_if_needed(images_directory)
+
+ annotation_files = verify_coco_annotation_dict(annotation_files)
+
+ dist_ds = dp.create(dest, dict(dest_creds) if dest_creds is not None else {})
+
+ unstructured = COCOStructuredDataset(
+ dataset=dist_ds,
+ images_directory=images_directory,
+ annotation_files=annotation_files,
+ )
+
+ unstructured.structure()
+
+ return dist_ds
diff --git a/python/deeplake/integrations/__init__.py b/python/deeplake/integrations/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/deeplake/integrations/constants.py b/python/deeplake/integrations/constants.py
new file mode 100644
index 0000000000..32b2f7dc15
--- /dev/null
+++ b/python/deeplake/integrations/constants.py
@@ -0,0 +1,5 @@
+# constant showing the GPU memory cleanup interval
+TIME_INTERVAL_FOR_CUDA_MEMORY_CLEANING = 10 * 60
+
+# DEEPLAKE_AUTH_TOKEN holds the value of the ACTIVELOOP_TOKEN environment variable
+DEEPLAKE_AUTH_TOKEN = "ACTIVELOOP_TOKEN"
diff --git a/python/deeplake/integrations/mm/__init__.py b/python/deeplake/integrations/mm/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/deeplake/integrations/mm/exceptions.py b/python/deeplake/integrations/mm/exceptions.py
new file mode 100644
index 0000000000..dffa82406c
--- /dev/null
+++ b/python/deeplake/integrations/mm/exceptions.py
@@ -0,0 +1,24 @@
+class EmptyTokenException(Exception):
+ def __init__(self, message="The authentication token is empty."):
+ super().__init__(message)
+
+
+class ValidationDatasetMissingError(Exception):
+ def __init__(self):
+ msg = (
+ "Validation dataset is not specified even though validate = True. "
+ "Please set validate = False or specify a validation dataset."
+ )
+ super().__init__(msg)
+
+
+class InvalidImageError(Exception):
+ def __init__(self, column_name, ex):
+ msg = f"Error on {column_name} data getting: {str(ex)}"
+ super().__init__(msg)
+
+
+class InvalidSegmentError(Exception):
+ def __init__(self, column_name, ex):
+ msg = f"Error on {column_name} data getting: {str(ex)}"
+ super().__init__(msg)
diff --git a/python/deeplake/integrations/mm/get_indexes.py b/python/deeplake/integrations/mm/get_indexes.py
new file mode 100644
index 0000000000..74ae89bba7
--- /dev/null
+++ b/python/deeplake/integrations/mm/get_indexes.py
@@ -0,0 +1,66 @@
+import math
+from typing import Optional
+
+
+def get_indexes(
+ dataset,
+ rank: Optional[int] = None,
+ num_replicas: Optional[int] = None,
+ drop_last: Optional[bool] = None,
+):
+ """
+ Generates a slice for a given rank in a distributed setting, dividing
+ the dataset evenly across multiple replicas.
+
+ Parameters:
+ dataset (Dataset): The dataset to split across distributed replicas.
+ rank (Optional[int]): The rank of the current process. If not specified,
+ the function will use the distributed package to get the current rank.
+ num_replicas (Optional[int]): Total number of replicas (i.e., processes) involved in distributed training.
+ If not specified, the function will determine the number based on the world size.
+ drop_last (Optional[bool]): If True, drop the extra data not evenly divisible among replicas.
+ This is useful for maintaining equal batch sizes across replicas.
+
+ Returns:
+ slice: A slice object representing the start and end indices for the current rank's portion of the dataset.
+
+ Raises:
+ RuntimeError: If the distributed package is not available when `rank` or `num_replicas` are not specified.
+ ValueError: If the specified `rank` is out of range based on the number of replicas.
+
+ Notes:
+ This function requires the `torch.distributed` package to determine the number of replicas and
+ rank when they are not provided. It is useful in distributed data loading to ensure each process
+ gets a specific subset of the data.
+ """
+ import torch.distributed as dist
+
+ if num_replicas is None:
+ if not dist.is_available():
+ raise RuntimeError("Requires distributed package to be available")
+ num_replicas = dist.get_world_size()
+ if rank is None:
+ if not dist.is_available():
+ raise RuntimeError("Requires distributed package to be available")
+ rank = dist.get_rank()
+ if rank >= num_replicas or rank < 0:
+ raise ValueError(
+ "Invalid rank {}, rank should be in the interval"
+ " [0, {}]".format(rank, num_replicas - 1)
+ )
+
+ dataset_length = len(dataset)
+
+ if drop_last:
+ total_size = (dataset_length // num_replicas) * num_replicas
+ per_process = total_size // num_replicas
+ else:
+ per_process = math.ceil(dataset_length / num_replicas)
+ total_size = per_process * num_replicas
+
+ start_index = rank * per_process
+ end_index = min(start_index + per_process, total_size)
+
+ end_index = min(end_index, dataset_length)
+
+ return slice(start_index, end_index)
diff --git a/python/deeplake/integrations/mm/ipc.py b/python/deeplake/integrations/mm/ipc.py
new file mode 100644
index 0000000000..71e418db7f
--- /dev/null
+++ b/python/deeplake/integrations/mm/ipc.py
@@ -0,0 +1,6 @@
+import socketserver
+
+
+def _get_free_port() -> int:
+ with socketserver.TCPServer(("localhost", 0), None) as s: # type: ignore
+ return s.server_address[1]
diff --git a/python/deeplake/integrations/mm/mm_common.py b/python/deeplake/integrations/mm/mm_common.py
new file mode 100644
index 0000000000..dcaa5c639d
--- /dev/null
+++ b/python/deeplake/integrations/mm/mm_common.py
@@ -0,0 +1,220 @@
+import os
+import torch
+import warnings
+import mmcv # type: ignore
+import deeplake as dp
+from deeplake.types import TypeKind
+from deeplake.integrations.mm.warnings import always_warn
+from deeplake.integrations.mm.exceptions import EmptyTokenException
+from deeplake.integrations.constants import DEEPLAKE_AUTH_TOKEN
+
+
+def ddp_setup(rank: int, world_size: int, port: int):
+ """
+ Args:
+ rank: Unique identifier of each process
+ world_size: Total number of processes
+ port: Port number
+ """
+ os.environ["MASTER_ADDR"] = "localhost"
+ os.environ["MASTER_PORT"] = str(port)
+ torch.distributed.init_process_group(
+ backend="nccl", rank=rank, world_size=world_size
+ )
+
+
+def force_cudnn_initialization(device_id):
+ dev = torch.device(f"cuda:{device_id}")
+ torch.nn.functional.conv2d(
+ torch.zeros(32, 32, 32, 32, device=dev), torch.zeros(32, 32, 32, 32, device=dev)
+ )
+
+
+def load_ds_from_cfg(cfg: mmcv.utils.config.ConfigDict):
+ creds = cfg.get("deeplake_credentials", {})
+ token = creds.get("token", None)
+ deeplake_commit = cfg.get("deeplake_commit")
+ deeplake_tag_id = cfg.get("deeplake_tag_id")
+ deeplake_query = cfg.get("deeplake_query")
+ token = token or os.environ.get(DEEPLAKE_AUTH_TOKEN)
+ if token is None:
+ raise EmptyTokenException()
+
+ try:
+ ds = dp.open_read_only(cfg.deeplake_path, token=token, creds=creds)
+ except:
+ if not deeplake_query:
+ raise
+ ds = dp.query(deeplake_query)
+
+ if deeplake_tag_id and deeplake_query:
+ raise Exception(
+ "A query and view_id were specified simultaneously for a dataset in the config. Please specify either the deeplake_query or the deeplake_tag_id."
+ )
+
+ if deeplake_commit:
+ ds.checkout(deeplake_commit)
+
+ if deeplake_tag_id:
+ ds = ds.tags(deeplake_tag_id).open()
+
+ if deeplake_query:
+ ds = ds.query(deeplake_query)
+
+ return ds
+
+
+def get_collect_keys(cfg):
+ pipeline = cfg.train_pipeline
+ for transform in pipeline:
+ if transform["type"] == "Collect":
+ return transform["keys"]
+ raise ValueError("collection keys were not specified")
+
+
+def check_persistent_workers(train_persistent_workers, val_persistent_workers):
+ if train_persistent_workers != val_persistent_workers:
+ if train_persistent_workers:
+ always_warn(
+ "persistent workers for training and evaluation should be identical, "
+ "otherwise, this could lead to performance issues. "
+ "Either both of then should be `True` or both of them should `False`. "
+ "If you want to use persistent workers set True for validation"
+ )
+ else:
+ always_warn(
+ "persistent workers for training and evaluation should be identical, "
+ "otherwise, this could lead to performance issues. "
+ "Either both of then should be `True` or both of them should `False`. "
+ "If you want to use persistent workers set True for training"
+ )
+
+
+def find_image_tensor(ds: dp.Dataset, mm_class=None):
+ images = [
+ col.name
+ for col in ds.schema.columns
+ if ds.schema[col.name].dtype.is_image
+ ]
+ if mm_class is not None:
+ always_warn(
+ f"No deeplake column name specified for '{mm_class} in config. Fetching it using type_kind '{TypeKind.Image}'."
+ )
+ if not images:
+ always_warn(f"No column found with type_kind='{TypeKind.Image}'")
+ return None
+ t = images[0]
+ if len(images) > 1:
+ always_warn(
+ f"Multiple columns with type_kind='{TypeKind.Image}' found. choosing '{t}'."
+ )
+ print(f"columns {images} kind {TypeKind.Image} mm_class {mm_class} t {t}")
+ return t
+
+
+def find_smask_tensor(ds: dp.Dataset, mm_class=None):
+ smasks = [
+ col.name
+ for col in ds.schema.columns
+ if ds.schema[col.name].dtype.is_segment_mask
+ ]
+ if mm_class is not None:
+ always_warn(
+ f"No deeplake column name specified for '{mm_class} in config. Fetching it using type_kind '{TypeKind.SegmentMask}'."
+ )
+ if not smasks:
+ always_warn(f"No column found with type_kind='{TypeKind.SegmentMask}'")
+ return None
+ t = smasks[0]
+ if len(smasks) > 1:
+ always_warn(
+ f"Multiple columns with type_kind='{TypeKind.SegmentMask}' found. choosing '{t}'."
+ )
+ print(f"columns {smasks} kind {TypeKind.SegmentMask} mm_class {mm_class} t {t}")
+ return t
+
+
+def find_tensor_with_htype(ds: dp.Dataset, type_kind=TypeKind.Image, mm_class=None):
+ colunms = [col.name for col in ds.schema.columns if col.dtype.kind == type_kind]
+ if mm_class is not None:
+ always_warn(
+ f"No deeplake column name specified for '{mm_class} in config. Fetching it using type_kind '{type_kind}'."
+ )
+ if not colunms:
+ always_warn(f"No column found with type_kind='{type_kind}'")
+ return None
+ t = colunms[0]
+ if len(colunms) > 1:
+ always_warn(
+ f"Multiple columns with type_kind='{type_kind}' found. choosing '{t}'."
+ )
+
+ print(f"columns {colunms} kind {type_kind} mm_class {mm_class} t {t}")
+ return t
+
+
+def check_unsupported_functionalities(cfg):
+ check_unused_dataset_fields(cfg)
+ check_unsupported_train_pipeline_fields(cfg, mode="train")
+ check_unsupported_train_pipeline_fields(cfg, mode="val")
+ check_dataset_augmentation_formats(cfg)
+
+
+def check_unused_dataset_fields(cfg):
+ if cfg.get("dataset_type"):
+ always_warn(
+ "The deeplake mmdet integration does not use dataset_type to work with the data and compute metrics. All deeplake datasets are in the same deeplake format. To specify a metrics format, you should deeplake_metrics_format "
+ )
+
+ if cfg.get("data_root"):
+ always_warn(
+ "The deeplake mmdet integration does not use data_root, this input will be ignored"
+ )
+
+
+def check_unsupported_train_pipeline_fields(cfg, mode="train"):
+ transforms = cfg.data[mode].pipeline
+
+ for transform in transforms:
+ transform_type = transform.get("type")
+
+ if transform_type == "LoadImageFromFile":
+ always_warn(
+ "LoadImageFromFile is going to be skipped because deeplake mmdet integration does not use it"
+ )
+
+ if transform_type == "LoadAnnotations":
+ always_warn(
+ "LoadAnnotations is going to be skipped because deeplake mmdet integration does not use it"
+ )
+
+ if transform_type == "Corrupt":
+ raise Exception("Corrupt augmentation is not supported yet.")
+
+ elif transform_type == "CopyPaste": # TO DO: @adolkhan resolve this
+ raise Exception("CopyPaste augmentation is not supported yet")
+
+ elif transform_type == "CutOut": # TO DO: @adolkhan resolve this
+ raise Exception("CutOut augmentation is not supported yet")
+
+ elif transform_type == "Mosaic": # TO DO: @adolkhan resolve this
+ raise Exception("Mosaic augmentation is not supported yet")
+
+
+def check_dataset_augmentation_formats(cfg):
+ if cfg.get("train_dataset"):
+ always_warn(
+ "train_dataset is going to be unused. Dataset types like: ConcatDataset, RepeatDataset, ClassBalancedDataset, MultiImageMixDataset are not supported."
+ )
+
+
+def get_pipeline(cfg, *, name: str, generic_name: str):
+ pipeline = cfg.data[name].get("pipeline", None)
+ if pipeline is None:
+ warnings.warn(
+ f"Warning: The '{name}' data pipeline is missing in the configuration. Attempting to locate in '{generic_name}'."
+ )
+
+ pipeline = cfg.get(generic_name, [])
+
+ return pipeline
diff --git a/python/deeplake/integrations/mm/mm_runners.py b/python/deeplake/integrations/mm/mm_runners.py
new file mode 100644
index 0000000000..97b9786f10
--- /dev/null
+++ b/python/deeplake/integrations/mm/mm_runners.py
@@ -0,0 +1,152 @@
+import mmcv # type: ignore
+
+import torch
+import logging
+from mmcv import runner
+from torch.utils.data import DataLoader
+
+import time
+import warnings
+from typing import List, Tuple, Optional
+from deeplake.integrations.constants import TIME_INTERVAL_FOR_CUDA_MEMORY_CLEANING
+
+
+def empty_cuda():
+ try:
+ torch.cuda.empty_cache()
+ except Exception:
+ pass
+ return
+
+
+@runner.RUNNERS.register_module()
+class DeeplakeIterBasedRunner(runner.IterBasedRunner):
+ def __init__(self, **kwargs):
+ self.force_cleanup = kwargs.pop("force_cleanup", True)
+ super().__init__(**kwargs)
+
+ def run(
+ self,
+ data_loaders: List[DataLoader],
+ workflow: List[Tuple[str, int]],
+ max_iters: Optional[int] = None,
+ **kwargs,
+ ) -> None:
+ assert isinstance(data_loaders, list)
+ assert mmcv.is_list_of(workflow, tuple)
+ assert len(data_loaders) == len(workflow)
+ if max_iters is not None:
+ warnings.warn(
+ "setting max_iters in run is deprecated, "
+ "please set max_iters in runner_config",
+ DeprecationWarning,
+ )
+ self._max_iters = max_iters
+ assert (
+ self._max_iters is not None
+ ), "max_iters must be specified during instantiation"
+
+ work_dir = self.work_dir if self.work_dir is not None else "NONE"
+ self.logger.info(
+ "Start running, host: %s, work_dir: %s",
+ runner.utils.get_host_info(),
+ work_dir,
+ )
+ self.logger.info(
+ "Hooks will be executed in the following order:\n%s", self.get_hook_info()
+ )
+ self.logger.info("workflow: %s, max: %d iters", workflow, self._max_iters)
+ self.call_hook("before_run")
+
+ iter_loaders = [runner.IterLoader(x) for x in data_loaders]
+
+ self.call_hook("before_epoch")
+
+ formatter = logging.Formatter("%(relative)ss")
+ start_time = time.time()
+
+ while self.iter < self._max_iters:
+ for i, flow in enumerate(workflow):
+ self._inner_iter = 0
+ mode, iters = flow
+ if not isinstance(mode, str) or not hasattr(self, mode):
+ raise ValueError(
+ 'runner has no method named "{}" to run a workflow'.format(mode)
+ )
+ iter_runner = getattr(self, mode)
+ for _ in range(iters):
+ if mode == "train" and self.iter >= self._max_iters:
+ break
+
+ iter_time = time.time()
+
+ if (
+ self.force_cleanup
+ and iter_time - start_time
+ > TIME_INTERVAL_FOR_CUDA_MEMORY_CLEANING
+ ):
+ empty_cuda()
+ start_time = iter_time
+ iter_runner(iter_loaders[i], **kwargs)
+
+ time.sleep(1) # wait for some hooks like loggers to finish
+ self.call_hook("after_epoch")
+ self.call_hook("after_run")
+
+
+@runner.RUNNERS.register_module()
+class DeeplakeEpochBasedRunner(runner.EpochBasedRunner):
+ def __init__(self, **kwargs):
+ self.force_cleanup = kwargs.pop("force_cleanup", True)
+ super().__init__(**kwargs)
+
+ def train(self, data_loader, **kwargs):
+ start_time = time.time()
+ self.model.train()
+ self.mode = "train"
+ self.data_loader = data_loader
+ self._max_iters = self._max_epochs * len(self.data_loader)
+ self.call_hook("before_train_epoch")
+ time.sleep(2) # Prevent possible deadlock during epoch transition
+ for i, data_batch in enumerate(self.data_loader):
+ self.data_batch = data_batch
+ self._inner_iter = i
+ self.call_hook("before_train_iter")
+ self.run_iter(data_batch, train_mode=True, **kwargs)
+ self.call_hook("after_train_iter")
+ del self.data_batch
+ self._iter += 1
+ iter_time = time.time()
+ if (
+ self.force_cleanup
+ and iter_time - start_time > TIME_INTERVAL_FOR_CUDA_MEMORY_CLEANING
+ ):
+ empty_cuda()
+ start_time = iter_time
+
+ self.call_hook("after_train_epoch")
+ self._epoch += 1
+
+ @torch.no_grad()
+ def val(self, data_loader, **kwargs):
+ start_time = time.time()
+ self.model.eval()
+ self.mode = "val"
+ self.data_loader = data_loader
+ self.call_hook("before_val_epoch")
+ time.sleep(2) # Prevent possible deadlock during epoch transition
+ for i, data_batch in enumerate(self.data_loader):
+ self.data_batch = data_batch
+ self._inner_iter = i
+ self.call_hook("before_val_iter")
+ self.run_iter(data_batch, train_mode=False)
+ self.call_hook("after_val_iter")
+ del self.data_batch
+ iter_time = time.time()
+ if (
+ self.force_cleanup
+ and iter_time - start_time > TIME_INTERVAL_FOR_CUDA_MEMORY_CLEANING
+ ):
+ empty_cuda()
+ start_time = iter_time
+ self.call_hook("after_val_epoch")
diff --git a/python/deeplake/integrations/mm/upcast_array.py b/python/deeplake/integrations/mm/upcast_array.py
new file mode 100644
index 0000000000..8f94dcdb32
--- /dev/null
+++ b/python/deeplake/integrations/mm/upcast_array.py
@@ -0,0 +1,15 @@
+import numpy as np
+from typing import Union
+
+
+def upcast_array(arr: Union[np.ndarray, bytes]):
+ if isinstance(arr, list):
+ return [upcast_array(a) for a in arr]
+ if isinstance(arr, np.ndarray):
+ if arr.dtype == np.uint16:
+ return arr.astype(np.int32)
+ if arr.dtype == np.uint32:
+ return arr.astype(np.int64)
+ if arr.dtype == np.uint64:
+ return arr.astype(np.int64)
+ return arr
diff --git a/python/deeplake/integrations/mm/warnings.py b/python/deeplake/integrations/mm/warnings.py
new file mode 100644
index 0000000000..fc2193d487
--- /dev/null
+++ b/python/deeplake/integrations/mm/warnings.py
@@ -0,0 +1,7 @@
+import warnings
+
+
+def always_warn(*args, **kwargs):
+ with warnings.catch_warnings():
+ warnings.simplefilter("always")
+ warnings.warn(*args, **kwargs)
diff --git a/python/deeplake/integrations/mm/worker_init_fn.py b/python/deeplake/integrations/mm/worker_init_fn.py
new file mode 100644
index 0000000000..43c4282d80
--- /dev/null
+++ b/python/deeplake/integrations/mm/worker_init_fn.py
@@ -0,0 +1,21 @@
+import numpy as np
+import torch
+import random
+
+
+def worker_init_fn(worker_id, num_workers, rank, seed):
+ """Worker init func for dataloader.
+
+ The seed of each worker equals to num_worker * rank + worker_id + user_seed
+
+ Args:
+ worker_id (int): Worker id.
+ num_workers (int): Number of workers.
+ rank (int): The rank of current process.
+ seed (int): The random seed to use.
+ """
+
+ worker_seed = num_workers * rank + worker_id + seed
+ np.random.seed(worker_seed)
+ random.seed(worker_seed)
+ torch.manual_seed(worker_seed)
diff --git a/python/deeplake/integrations/mmdet/__init__.py b/python/deeplake/integrations/mmdet/__init__.py
new file mode 100644
index 0000000000..3731638a14
--- /dev/null
+++ b/python/deeplake/integrations/mmdet/__init__.py
@@ -0,0 +1,2 @@
+from deeplake.integrations.mmdet.mmdet_ import train_detector
+from mmdet.models import build_detector # type: ignore
diff --git a/python/deeplake/integrations/mmdet/mmdet_.py b/python/deeplake/integrations/mmdet/mmdet_.py
new file mode 100644
index 0000000000..32a9e79573
--- /dev/null
+++ b/python/deeplake/integrations/mmdet/mmdet_.py
@@ -0,0 +1,813 @@
+"""
+Deep Lake offers an integration with MMDetection, a popular open-source object detection toolbox based on PyTorch.
+The integration enables users to train models while streaming Deep Lake dataset using the transformation, training, and evaluation tools built by MMDet.
+
+Learn more about MMDetection `here `_.
+
+Integration Interface
+~~~~~~~~~~~~~~~~~~~~~
+MMDetection works with configs. Deeplake adopted this strategy, and in order to train MMDet models, you need to create/specify your model
+and training/validation config. Deep Lake integration's logic is almost the same as MMDetection's with some minor modifications. The integrations
+with MMDET occurs in the deeplake.integrations.mmdet module. At a high-level, Deep Lake is responsible for the pytorch dataloader that streams data
+to the training framework, while MMDET is used for the training, transformation, and evaluation logic. Let us take a look at the config with deeplake changes:
+
+Deeplake integration requires the following parameters to be specified in the configuration file:
+
+- ``data``: Just like in the MMDetection configuration files, in data dictionary you can specify everything that you want to be applied to the data during training and validation
+ - ``train``: Keyword argument of data, a dictionary where one can specify dataset path, credentials, transformations of the training data
+ - ``val``: Keyword argument of data, a dictionary where one can specify dataset path, credentials, transformations of the validation data
+ - ``pipeline``: List of transformations. This parameter exists for train as well as for val.
+
+ - Example:
+
+ >>> pipeline = [dict(type="Resize", img_scale=[(320, 320), (608, 608)], keep_ratio=True), dict(type="RandomFlip", flip_ratio=0.5), dict(type="PhotoMetricDistortion")]
+
+ - ``deeplake_path``: Path to the deeplake dataset. This parameter exists for train as well as for val.
+ - ``deeplake_credentials``: Optional parameter. Required only when using private nonlocal datasets. See documendataion for `deeplake.open_read_only() https://docs.deeplake.ai/latest/api/dataset/#deeplake.open_read_only`_ for details. This parameter exists for train as well as for val.
+ - ``deeplake_tag_id``: Optional parameter. If specified, the dataset will checkout to the tag. This parameter exists for train as well as for val. See documentation for `Dataset.commit_id `_
+ - ``deeplake_query``: Optional paramterer. If specified, the dataset can be loaded from the query is dataset_path was not been specified as well as the applied on that dataset of dataset_path was specified before
+ - ``deeplake_tensors``: Optional parameter. If specified maps MMDetection tensors to the associated tensors in the dataset. MMDet tensors are: "img", "gt_bboxes", "gt_labels", "gt_masks". This parameter exists for train as well as for val.
+ - ``"img"``: Stands for image tensor.
+ - ``"gt_bboxes"``: Stands for bounding box tensor.
+ - ``"gt_labels"``: Stands for labels tensor.
+ - ``"gt_masks"``: Stands for masks tensor.
+
+ - ``deeplake_dataloader``: Optional parameter. If specified represents the parameters of the deeplake dataloader. Deeplake dataloader parameters are: "shuffle", "batch_size", "num_workers". This parameter exists for train as well as for val.
+ - ``"shuffle"``: If ``True`` shuffles the dataset.
+ - ``"batch_size"``: Size of batch. If not specified, dataloader will use ``samples_per_gpu``.
+ - ``"num_workers"``: Number of workers to use. If not specified, dataloader will use ``workers_per_gpu``.
+
+- ``deeplake_metrics_format``: Optional parameter. If specified, it represents the format of the deeplake metrics that will be used during evaluation. Defaults to COCO.
+ Avaliable values are: "COCO", "PascalVOC". If COCO format is used, you can specify whether you want to evaluate on bbox only or also want to evaluate on masks.
+ To do that you need to specify the format of the metric in metric.
+
+Example:
+
+>>> deeplake_metrics_format = "COCO"
+>>> evaluation = dict(metric=["bbox"], interval=1)
+
+- ``train_detector``: Function to train the MMDetection model.
+
+ Parameters:
+
+ - ``model``: MMDetection model that is going to be used.
+ - ``cfg``: mmcv.ConfigDict, Configuration of the model as well as of the datasets and transforms that's going to be used.
+ - ``ds_train``: Optional parameter. If provided will overwrite deeplake_path in train, and will pass this tensor directly to the dataloader.
+ - ``ds_val``: Optional parameter. If provided will overwrite deeplake_path in val, and will pass this tensor directly to the dataloader.
+ - ``ds_train_tensors``: Optional parameter. If provided will overwrite deeplake_tensors in train, and will pass this tensor mapping directly to dataloader.
+ - ``ds_val_tensors``: Optional parameter. If provided will overwrite deeplake_tensors in val, and will pass this tensor mapping directly to dataloader.
+ - ``distributed``: Optional parameter. If provided will run the code on all available gpus. Meta data used to build runner.
+ - ``timestamp``: Variable used in runner to make .log and .log.json filenames the same.
+ - ``validate``: Bool, whether validation should be run, defaults to ``True``.
+
+NOTE:
+ ``gt_masks`` is optional parameter and lets say you want to train pure detector this part is going to exclude. Other mappings are mandatory
+ if you don't specify them explicitly they are going to be searched in the dataset according to tensor htype. Better to specify them explicitly.
+
+MMDetection Config Examples
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Below is the example of the deeplake mmdet configuration:
+
+
+>>> _base_ = "../mmdetection/configs/yolo/yolov3_d53_mstrain-416_273e_coco.py"
+>>> # use caffe img_norm
+>>> img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True)
+>>> train_pipeline = [
+... dict(type='LoadImageFromFile'),
+... dict(type='LoadAnnotations', with_bbox=True),
+... dict(
+... type='Expand',
+... mean=img_norm_cfg['mean'],
+... to_rgb=img_norm_cfg['to_rgb'],
+... ratio_range=(1, 2)),
+... dict(type='Resize', img_scale=[(320, 320), (416, 416)], keep_ratio=True),
+... dict(type='RandomFlip', flip_ratio=0.0),
+... dict(type='PhotoMetricDistortion'),
+... dict(type='Normalize', **img_norm_cfg),
+... dict(type='Pad', size_divisor=32),
+... dict(type='DefaultFormatBundle'),
+... dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+... ]
+>>> test_pipeline = [
+... dict(type='LoadImageFromFile'),
+... dict(
+... type='MultiScaleFlipAug',
+... img_scale=(416, 416),
+... flip=False,
+... transforms=[
+... dict(type='Resize', keep_ratio=True),
+... dict(type='RandomFlip', flip_ratio=0.0),
+... dict(type='Normalize', **img_norm_cfg),
+... dict(type='Pad', size_divisor=32),
+... dict(type='ImageToTensor', keys=['img']),
+... dict(type='Collect', keys=['img'])
+... ])
+... ]
+>>> #--------------------------------------DEEPLAKE INPUTS------------------------------------------------------------#
+>>> TOKEN = "INSERT_YOUR_DEEPLAKE_TOKEN"
+>>> data = dict(
+... # samples_per_gpu=4, # Is used instead of batch_size if deeplake_dataloader is not specified below
+... # workers_per_gpu=8, # Is used instead of num_workers if deeplake_dataloader is not specified below
+... train=dict(
+... pipeline=train_pipeline,
+... # Credentials for authentication. See documendataion for deeplake.open() for details
+... deeplake_path="al://activeloop/coco-train",
+... deeplake_credentials={
+... "token": TOKEN,
+... "creds": None,
+... },
+... #OPTIONAL - Checkout the specified commit_id before training
+... deeplake_commit_id="",
+... #OPTIONAL - Loads a dataset tag for training based on tag_id
+... deeplake_tag_id="",
+... # OPTIONAL - {"mmdet_key": "deep_lake_tensor",...} - Maps Deep Lake tensors to MMDET dictionary keys.
+... # If not specified, Deep Lake will auto-infer the mapping, but it might make mistakes if datasets have many tensors
+... deeplake_tensors = {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories", "gt_masks": "masks},
+... # OPTIONAL - Parameters to use for the Deep Lake dataloader. If unspecified, the integration uses
+... # the parameters in other parts of the cfg file such as samples_per_gpu, and others.
+... deeplake_dataloader = {"shuffle": True, "batch_size": 4, 'num_workers': 8}
+... ),
+... # Parameters as the same as for train
+... val=dict(
+... pipeline=test_pipeline,
+... deeplake_path="al://activeloop/coco-val",
+... deeplake_credentials={
+... "token": TOKEN,
+... "creds": None,
+... },
+... deeplake_tensors = {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"},
+... deeplake_dataloader = {"shuffle": False, "batch_size": 1, 'num_workers': 8}
+... ),
+... )
+>>> # Which dataloader to use
+>>> # Which metrics to use for evaulation. In MMDET (without Deeplake), this is inferred from the dataset type.
+>>> # In the Deep Lake integration, since the format is standardized, a variety of metrics can be used for a given dataset.
+>>> deeplake_metrics_format = "COCO"
+>>> #----------------------------------END DEEPLAKE INPUTS------------------------------------------------------------#
+
+And config for training:
+
+>>> import os
+>>> from mmcv import Config
+>>> import mmcv
+>>> from deeplake.integrations import mmdet as mmdet_deeplake
+>>> cfg = Config.fromfile(cfg_file)
+>>> cfg.model.bbox_head.num_classes = num_classes
+>>> # Build the detector
+>>> model = mmdet_deeplake.build_detector(cfg.model)
+>>> # Create work_dir
+>>> mmcv.mkdir_or_exist(os.path.abspath(cfg.work_dir))
+>>> # Run the training
+>>> mmdet_deeplake.train_detector(model, cfg, distributed=args.distributed, validate=args.validate)
+"""
+
+from collections import OrderedDict
+
+from typing import Callable, Optional, List, Dict, Sequence
+
+from functools import partial
+
+import os
+import math
+import types
+import torch
+import warnings
+import tempfile
+import numpy as np
+import os.path as osp
+
+from PIL import Image, ImageDraw # type: ignore
+
+from terminaltables import AsciiTable # type: ignore
+
+try:
+ from mmdet.apis.train import auto_scale_lr # type: ignore
+except Exception:
+ import mmdet # type: ignore
+
+ version = mmdet.__version__
+ raise Exception(
+ f"MMDet {version} version is not supported. The latest supported MMDet version with deeplake is 2.28.1."
+ )
+from mmdet.utils import ( # type: ignore
+ build_dp,
+ compat_cfg,
+ find_latest_checkpoint,
+ get_root_logger,
+)
+from mmdet.core import DistEvalHook, EvalHook # type: ignore
+from mmdet.core import build_optimizer
+
+from mmdet.datasets import replace_ImageToTensor # type: ignore
+
+from mmdet.datasets.builder import PIPELINES # type: ignore
+from mmdet.datasets.pipelines import Compose # type: ignore
+from mmdet.core import BitmapMasks # type: ignore
+from mmdet.core import eval_map, eval_recalls
+from mmdet.utils.util_distribution import * # type: ignore
+from mmdet.core import BitmapMasks, PolygonMasks
+
+import mmcv # type: ignore
+from mmcv.runner import init_dist # type: ignore
+from mmcv.parallel import collate # type: ignore
+from mmcv.utils import build_from_cfg, digit_version # type: ignore
+from mmcv.utils import print_log
+from mmcv.runner import ( # type: ignore
+ DistSamplerSeedHook,
+ EpochBasedRunner,
+ Fp16OptimizerHook,
+ OptimizerHook,
+ build_runner,
+ get_dist_info,
+)
+
+import deeplake as dp
+from deeplake.types import TypeKind
+from deeplake.integrations.mm.exceptions import ValidationDatasetMissingError
+
+from deeplake.integrations.mmdet.mmdet_dataset_ import (
+ MMDetTorchDataset,
+ MMDetDataset,
+ transform,
+)
+from deeplake.integrations.mm.ipc import _get_free_port
+from deeplake.integrations.mm.warnings import always_warn
+from deeplake.integrations.mm.get_indexes import get_indexes
+from deeplake.integrations.mm.upcast_array import upcast_array
+from deeplake.integrations.mm.worker_init_fn import worker_init_fn
+from deeplake.integrations.mm.mm_runners import DeeplakeIterBasedRunner
+from deeplake.integrations.mm.mm_common import (
+ load_ds_from_cfg,
+ get_collect_keys,
+ check_persistent_workers,
+ find_tensor_with_htype,
+ find_image_tensor,
+ ddp_setup,
+ force_cudnn_initialization,
+ check_unsupported_functionalities,
+ get_pipeline,
+)
+
+from torch.utils.data import DataLoader
+
+# Monkey-patch the function
+from deeplake.integrations.mmdet.test_ import single_gpu_test as custom_single_gpu_test
+from deeplake.integrations.mmdet.test_ import multi_gpu_test as custom_multi_gpu_test
+
+import mmdet.apis
+
+mmdet.apis.single_gpu_test = custom_single_gpu_test
+mmdet.apis.multi_gpu_test = custom_multi_gpu_test
+
+
+def build_ddp(model, device, *args, **kwargs):
+ """Build DistributedDataParallel module by device type.
+
+ If device is cuda, return a MMDistributedDataParallel model;
+ if device is mlu, return a MLUDistributedDataParallel model.
+
+ Args:
+ model (:class:`nn.Module`): module to be parallelized.
+ device (str): device type, mlu or cuda.
+ args (List): arguments to be passed to ddp_factory
+ kwargs (dict): keyword arguments to be passed to ddp_factory
+
+ Returns:
+ :class:`nn.Module`: the module to be parallelized
+
+ References:
+ .. [1] https://pytorch.org/docs/stable/generated/torch.nn.parallel.
+ DistributedDataParallel.html
+ """
+
+ assert device in ["cuda", "mlu"], "Only available for cuda or mlu devices."
+ if device == "cuda":
+ model = model.cuda(kwargs["device_ids"][0]) # patch
+ elif device == "mlu":
+ from mmcv.device.mlu import MLUDistributedDataParallel # type: ignore
+
+ ddp_factory["mlu"] = MLUDistributedDataParallel
+ model = model.mlu()
+
+ return ddp_factory[device](model, *args, **kwargs)
+
+
+def mmdet_subiterable_dataset_eval(
+ self,
+ *args,
+ **kwargs,
+):
+ return self.dataset.mmdet_dataset.evaluate(*args, **kwargs)
+
+
+def build_dataloader(
+ dataset: dp.Dataset,
+ images_tensor: str,
+ masks_tensor: Optional[str],
+ boxes_tensor: str,
+ labels_tensor: str,
+ pipeline: List,
+ mode: str = "train",
+ **loader_config,
+):
+ poly2mask = False
+ if masks_tensor is not None:
+ if dataset.schema[masks_tensor].dtype.kind == TypeKind.Polygon:
+ poly2mask = True
+
+ bbox_info = dict(dataset[boxes_tensor].metadata)
+ classes = dataset[labels_tensor].metadata["class_names"]
+ pipeline = build_pipeline(pipeline)
+ metrics_format = loader_config.get("metrics_format")
+ persistent_workers = loader_config.get("persistent_workers", False)
+ dist = loader_config["dist"]
+ seed = loader_config["seed"]
+
+ transform_fn = partial(
+ transform,
+ images_tensor=images_tensor,
+ masks_tensor=masks_tensor,
+ boxes_tensor=boxes_tensor,
+ labels_tensor=labels_tensor,
+ pipeline=pipeline,
+ bbox_info=bbox_info,
+ poly2mask=poly2mask,
+ )
+
+ num_workers = loader_config.get("num_workers")
+ pin_memory = loader_config.get("pin_memory", False)
+ if num_workers is None:
+ num_workers = loader_config["workers_per_gpu"]
+
+ shuffle = loader_config.get("shuffle", True)
+ tensors_dict = {
+ "images_tensor": images_tensor,
+ "boxes_tensor": boxes_tensor,
+ "labels_tensor": labels_tensor,
+ }
+ tensors = [images_tensor, labels_tensor, boxes_tensor]
+ if masks_tensor is not None:
+ tensors.append(masks_tensor)
+ tensors_dict["masks_tensor"] = masks_tensor
+
+ batch_size = loader_config.get("batch_size")
+ drop_last = loader_config.get("drop_last", False)
+ if batch_size is None:
+ batch_size = loader_config["samples_per_gpu"]
+
+ collate_fn = partial(collate, samples_per_gpu=batch_size)
+
+ mmdet_ds = MMDetDataset(
+ dataset=dataset,
+ metrics_format=metrics_format,
+ pipeline=pipeline,
+ tensors_dict=tensors_dict,
+ tensors=tensors,
+ mode=mode,
+ bbox_info=bbox_info,
+ num_gpus=loader_config["num_gpus"],
+ batch_size=batch_size,
+ )
+
+ if dist:
+ rank, world_size = get_dist_info()
+ sl = get_indexes(
+ dataset, rank=rank, num_replicas=world_size, drop_last=drop_last
+ )
+ dataset = dataset.query(
+ f"select * LIMIT {sl.stop - sl.start} OFFSET {sl.start}"
+ )
+
+ pytorch_ds = MMDetTorchDataset(dataset, transform=transform_fn)
+
+ init_fn = (
+ partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed)
+ if seed is not None
+ else None
+ )
+
+ if digit_version(torch.__version__) >= digit_version("1.8.0"):
+ loader = DataLoader(
+ pytorch_ds,
+ batch_size=batch_size,
+ sampler=None,
+ num_workers=num_workers,
+ collate_fn=collate_fn,
+ pin_memory=pin_memory,
+ shuffle=shuffle,
+ worker_init_fn=init_fn,
+ drop_last=drop_last,
+ persistent_workers=persistent_workers,
+ )
+ else:
+ loader = DataLoader(
+ pytorch_ds,
+ batch_size=batch_size,
+ sampler=None,
+ num_workers=num_workers,
+ collate_fn=collate_fn,
+ pin_memory=pin_memory,
+ shuffle=shuffle,
+ worker_init_fn=init_fn,
+ drop_last=drop_last,
+ )
+
+ loader.dataset.mmdet_dataset = mmdet_ds
+ loader.dataset.pipeline = loader.dataset.mmdet_dataset.pipeline
+ eval_fn = partial(mmdet_subiterable_dataset_eval, loader)
+ loader.dataset.evaluate = eval_fn
+ loader.dataset.CLASSES = classes
+ return loader
+
+
+def build_pipeline(steps):
+ return Compose(
+ [
+ build_from_cfg(step, PIPELINES, None)
+ for step in steps
+ if step["type"] not in {"LoadImageFromFile", "LoadAnnotations"}
+ ]
+ )
+
+
+def train_detector(
+ model,
+ cfg: mmcv.ConfigDict,
+ ds_train=None,
+ ds_train_tensors=None,
+ ds_val: Optional[dp.Dataset] = None,
+ ds_val_tensors=None,
+ distributed: bool = False,
+ timestamp=None,
+ meta=None,
+ validate: bool = True,
+):
+ """
+ Creates runner and trains evaluates the model:
+ Args:
+ model: model to train, should be built before passing
+ train_dataset: dataset to train of type dp.Dataset
+ cfg: mmcv.ConfigDict object containing all necessary configuration.
+ In cfg we have several changes to support deeplake integration:
+ _base_: still serves as a base model to inherit from
+ data: where everything related to data processing, you will need to specify the following parameters:
+ train: everything related to training data, it has the following attributes:
+ pipeline: dictionary where all training augmentations and transformations should be specified, like in mmdet
+ deeplake_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example: `{"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}`.
+ If this dictionary is not specified, these tensors will be searched automatically using htypes like "image", "class_label, "bbox", "segment_mask" or "polygon".
+ keys that needs to be mapped are: `img`, `gt_labels`, `gt_bboxes`, `gt_masks`. `img`, `gt_labels`, `gt_bboxes` are always required, if they not specified they
+ are always searched, while masks are optional, if you specify in collect `gt_masks` then you need to either specify it in config or it will be searched based on
+ `segment_mask` and `polygon` htypes.
+ deeplake_credentials: dictionary with deeplake credentials that allow you to acess the specified data. It has following arguments: `token`.
+ `token` is the token that gives you read or write access to the datasets. It is available in your personal account on: https://www.activeloop.ai/.
+ val (Optional): everything related to validating data, it has the following attributes:
+ pipeline: dictionary where all training augmentations and transformations should be specified, like in mmdet
+ deeplake_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example: {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}.
+ If this dictionary is not specified, these tensors will be searched automatically using htypes like "image", "class_label, "bbox", "segment_mask" or "polygon".
+ keys that needs to be mapped are: `img`, `gt_labels`, `gt_bboxes`, `gt_masks`. `img`, `gt_labels`, `gt_bboxes` are always required, if they not specified they
+ are always searched, while masks are optional, if you specify in collect `gt_masks` then you need to either specify it in config or it will be searched based on
+ `segment_mask` and `polygon` htypes.
+ deeplake_credentials: deeplake credentials that allow you to acess the specified data. It has following arguments: `token`.
+ `token` is the token that gives you read or write access to the datasets. It is available in your personal account on: https://www.activeloop.ai/.
+ test (Optional): everything related to testing data, it has the following attributes:
+ pipeline: dictionary where all training augmentations and transformations should be specified, like in mmdet
+ deeplake_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example: {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}.
+ If this dictionary is not specified, these tensors will be searched automatically using htypes like "image", "class_label, "bbox", "segment_mask" or "polygon".
+ keys that needs to be mapped are: `img`, `gt_labels`, `gt_bboxes`, `gt_masks`. `img`, `gt_labels`, `gt_bboxes` are always required, if they not specified they
+ are always searched, while masks are optional, if you specify in collect `gt_masks` then you need to either specify it in config or it will be searched based on
+ `segment_mask` and `polygon` htypes.
+ deeplake_credentials: deeplake credentials that allow you to acess the specified data. It has following arguments: `token`.
+ `token` is the token that gives you read or write access to the datasets. It is available in your personal account on: https://www.activeloop.ai/.
+ samples_per_gpu: number of samples to be processed per gpu
+ workers_per_gpu: number of workers per gpu
+ optimizer: dictionary containing information about optimizer initialization
+ optimizer_config: some optimizer configuration that might be used during training like grad_clip etc.
+ runner: training type e.g. EpochBasedRunner, here you can specify maximum number of epcohs to be conducted. For instance: `runner = dict(type='EpochBasedRunner', max_epochs=273)`
+ ds_train: train dataset of type dp.Dataset. This can be a view of the dataset.
+ ds_train_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example: {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}.
+ If this dictionary is not specified, these tensors will be searched automatically using htypes like "image", "class_label, "bbox", "segment_mask" or "polygon".
+ keys that needs to be mapped are: `img`, `gt_labels`, `gt_bboxes`, `gt_masks`. `img`, `gt_labels`, `gt_bboxes` are always required, if they not specified they
+ are always searched, while masks are optional, if you specify in collect `gt_masks` then you need to either specify it in config or it will be searched based on
+ `segment_mask` and `polygon` htypes.
+ ds_val: validation dataset of type dp.Dataset. This can be view of the dataset.
+ ds_val_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example: {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}.
+ If this dictionary is not specified, these tensors will be searched automatically using htypes like "image", "class_label, "bbox", "segment_mask" or "polygon".
+ keys that needs to be mapped are: `img`, `gt_labels`, `gt_bboxes`, `gt_masks`. `img`, `gt_labels`, `gt_bboxes` are always required, if they not specified they
+ are always searched, while masks are optional, if you specify in collect `gt_masks` then you need to either specify it in config or it will be searched based on
+ `segment_mask` and `polygon` htypes.
+ evaluation: dictionary that contains all information needed for evaluation apart from data processing, like how often evaluation should be done and what metrics we want to use. In deeplake
+ integration version you also need to specify what kind of output you want to be printed during evalaution. For instance, `evaluation = dict(interval=1, metric=['bbox'], metrics_format="COCO")`
+ distributed: bool, whether ddp training should be started, by default `False`
+ timestamp: variable used in runner to make .log and .log.json filenames the same
+ meta: meta data used to build runner
+ validate: bool, whether validation should be conducted, by default `True`
+ """
+ check_unsupported_functionalities(cfg)
+
+ if not hasattr(cfg, "gpu_ids"):
+ cfg.gpu_ids = range(torch.cuda.device_count() if distributed else 1)
+ if distributed:
+ return torch.multiprocessing.spawn(
+ _train_detector,
+ args=(
+ model,
+ cfg,
+ ds_train,
+ ds_train_tensors,
+ ds_val,
+ ds_val_tensors,
+ distributed,
+ timestamp,
+ meta,
+ validate,
+ _get_free_port(),
+ ),
+ nprocs=len(cfg.gpu_ids),
+ )
+ _train_detector(
+ 0,
+ model,
+ cfg,
+ ds_train,
+ ds_train_tensors,
+ ds_val,
+ ds_val_tensors,
+ distributed,
+ timestamp,
+ meta,
+ validate,
+ )
+
+
+def _train_detector(
+ local_rank,
+ model,
+ cfg: mmcv.ConfigDict,
+ ds_train=None,
+ ds_train_tensors=None,
+ ds_val: Optional[dp.Dataset] = None,
+ ds_val_tensors=None,
+ distributed: bool = False,
+ timestamp=None,
+ meta=None,
+ validate: bool = True,
+ port=None,
+):
+ batch_size = cfg.data.get("samples_per_gpu", 256)
+ num_workers = cfg.data.get("workers_per_gpu", 1)
+
+ if ds_train is None:
+ ds_train = load_ds_from_cfg(cfg.data.train)
+ ds_train_tensors = cfg.data.train.get("deeplake_tensors", {})
+ else:
+ cfg_data = cfg.data.train.get("deeplake_path")
+ if cfg_data:
+ always_warn(
+ "A Deep Lake dataset was specified in the cfg as well as inthe dataset input to train_detector. The dataset input to train_detector will be used in the workflow."
+ )
+
+ eval_cfg = cfg.get("evaluation", {})
+ if ds_train_tensors:
+ train_images_tensor = ds_train_tensors["img"]
+ train_boxes_tensor = ds_train_tensors["gt_bboxes"]
+ train_labels_tensor = ds_train_tensors["gt_labels"]
+ train_masks_tensor = ds_train_tensors.get("gt_masks")
+ else:
+ train_images_tensor = find_image_tensor(ds_train, mm_class="img")
+ train_boxes_tensor = find_tensor_with_htype(
+ ds_train, type_kind=TypeKind.BoundingBox, mm_class="gt_bboxes"
+ )
+ train_labels_tensor = find_tensor_with_htype(
+ ds_train, type_kind=TypeKind.ClassLabel, mm_class="train gt_labels"
+ )
+ train_masks_tensor = None
+
+ collection_keys = get_collect_keys(cfg)
+ if "gt_masks" in collection_keys:
+ train_masks_tensor = find_tensor_with_htype(
+ ds_train, type_kind=TypeKind.BinaryMask, mm_class="gt_masks"
+ ) or find_tensor_with_htype(
+ ds_train, type_kind=TypeKind.Polygon, mm_class="gt_masks"
+ )
+
+ # TODO verify required tensors are not None and raise Exception.
+ if hasattr(model, "CLASSES"):
+ warnings.warn(
+ "model already has a CLASSES attribute. dataset.info.class_names will not be used."
+ )
+ elif "class_names" in dict(ds_train[train_labels_tensor].metadata):
+ model.CLASSES = ds_train[train_labels_tensor].metadata["class_names"]
+
+ metrics_format = cfg.get("deeplake_metrics_format", "COCO")
+
+ logger = get_root_logger(log_level=cfg.log_level)
+
+ runner_type = "EpochBasedRunner" if "runner" not in cfg else cfg.runner["type"]
+
+ train_dataloader_default_args = dict(
+ samples_per_gpu=batch_size,
+ workers_per_gpu=num_workers,
+ # `num_gpus` will be ignored if distributed
+ num_gpus=len(cfg.gpu_ids),
+ dist=distributed,
+ seed=cfg.seed,
+ runner_type=runner_type,
+ metrics_format=metrics_format,
+ )
+
+ train_loader_cfg = {
+ **train_dataloader_default_args,
+ **cfg.data.get("train_dataloader", {}),
+ **cfg.data.train.get("deeplake_dataloader", {}),
+ }
+
+ # put model on gpus
+ if distributed:
+ find_unused_parameters = cfg.get("find_unused_parameters", False)
+ # Sets the `find_unused_parameters` parameter in
+ # # torch.nn.parallel.DistributedDataParallel
+ # model = torch.nn.parallel.DistributedDataParallel(model.cuda(),
+ # device_ids=[local_rank],
+ # output_device=local_rank,
+ # broadcast_buffers=False,
+ # find_unused_parameters=find_unused_parameters)
+ force_cudnn_initialization(cfg.gpu_ids[local_rank])
+ ddp_setup(local_rank, len(cfg.gpu_ids), port)
+ model = build_ddp(
+ model,
+ cfg.device,
+ device_ids=[cfg.gpu_ids[local_rank]],
+ broadcast_buffers=False,
+ find_unused_parameters=find_unused_parameters,
+ )
+ else:
+ model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids)
+
+ train_pipeline = get_pipeline(cfg, name="train", generic_name="train_pipeline")
+
+ data_loader = build_dataloader(
+ ds_train, # TO DO: convert it to for loop if we will suport concatting several datasets
+ train_images_tensor,
+ train_masks_tensor,
+ train_boxes_tensor,
+ train_labels_tensor,
+ pipeline=train_pipeline,
+ **train_loader_cfg,
+ )
+ # build optimizer
+ auto_scale_lr(cfg, distributed, logger)
+ optimizer = build_optimizer(model, cfg.optimizer)
+
+ cfg.custom_imports = dict(
+ imports=["deeplake.integrations.mm.mm_runners"],
+ allow_failed_imports=False,
+ )
+ if cfg.runner.type == "IterBasedRunner":
+ cfg.runner.type = "DeeplakeIterBasedRunner"
+ elif cfg.runner.type == "EpochBasedRunner":
+ cfg.runner.type = "DeeplakeEpochBasedRunner"
+
+ runner = build_runner(
+ cfg.runner,
+ default_args=dict(
+ model=model,
+ optimizer=optimizer,
+ work_dir=cfg.work_dir,
+ logger=logger,
+ meta=meta,
+ force_cleanup=True,
+ ),
+ )
+
+ # an ugly workaround to make .log and .log.json filenames the same
+ runner.timestamp = timestamp
+
+ # fp16 setting
+ fp16_cfg = cfg.get("fp16", None)
+ if fp16_cfg is not None:
+ optimizer_config = Fp16OptimizerHook(
+ **cfg.optimizer_config, **fp16_cfg, distributed=distributed
+ )
+ elif distributed and "type" not in cfg.optimizer_config:
+ optimizer_config = OptimizerHook(**cfg.optimizer_config)
+ else:
+ optimizer_config = cfg.optimizer_config
+
+ # register hooks
+ runner.register_training_hooks(
+ cfg.lr_config,
+ optimizer_config,
+ cfg.checkpoint_config,
+ cfg.log_config,
+ cfg.get("momentum_config", None),
+ custom_hooks_config=cfg.get("custom_hooks", None),
+ )
+
+ if distributed:
+ if isinstance(runner, EpochBasedRunner):
+ runner.register_hook(DistSamplerSeedHook())
+
+ # register eval hooks
+ if validate:
+ val_dataloader_default_args = dict(
+ samples_per_gpu=batch_size,
+ workers_per_gpu=num_workers,
+ dist=distributed,
+ seed=cfg.seed,
+ shuffle=False,
+ mode="val",
+ metrics_format=metrics_format,
+ num_gpus=len(cfg.gpu_ids),
+ )
+
+ val_dataloader_args = {
+ **cfg.data.val.get("deeplake_dataloader", {}),
+ **val_dataloader_default_args,
+ }
+
+ train_persistent_workers = train_loader_cfg.get("persistent_workers", False)
+ val_persistent_workers = val_dataloader_args.get("persistent_workers", False)
+ check_persistent_workers(train_persistent_workers, val_persistent_workers)
+
+ if val_dataloader_args.get("shuffle", False):
+ always_warn("shuffle argument for validation dataset will be ignored.")
+
+ if ds_val is None:
+ cfg_ds_val = cfg.data.get("val")
+ if cfg_ds_val is None or not any(
+ cfg_ds_val.get(key) is not None
+ for key in ["deeplake_path", "deeplake_query"]
+ ):
+ raise ValidationDatasetMissingError()
+
+ ds_val = load_ds_from_cfg(cfg.data.val)
+ ds_val_tensors = cfg.data.val.get("deeplake_tensors", {})
+ else:
+ cfg_data = cfg.data.val.get("deeplake_path")
+ if cfg_data is not None:
+ always_warn(
+ "A Deep Lake dataset was specified in the cfg as well as in the dataset input to train_detector. The dataset input to train_detector will be used in the workflow."
+ )
+
+ if ds_val is None:
+ raise ValidationDatasetMissingError()
+
+ if val_dataloader_args["samples_per_gpu"] > 1:
+ # Replace 'ImageToTensor' to 'DefaultFormatBundle'
+ cfg.data.val.pipeline = replace_ImageToTensor(cfg.data.val.pipeline)
+
+ if ds_val_tensors:
+ val_images_tensor = ds_val_tensors["img"]
+ val_boxes_tensor = ds_val_tensors["gt_bboxes"]
+ val_labels_tensor = ds_val_tensors["gt_labels"]
+ val_masks_tensor = ds_val_tensors.get("gt_masks")
+ else:
+ val_images_tensor = find_image_tensor(ds_val, mm_class="img")
+ val_boxes_tensor = find_tensor_with_htype(
+ ds_val, type_kind=TypeKind.BoundingBox, mm_class="gt_bboxes"
+ )
+ val_labels_tensor = find_tensor_with_htype(
+ ds_val, type_kind=TypeKind.ClassLabel, mm_class="gt_labels"
+ )
+ val_masks_tensor = None
+
+ collection_keys = get_collect_keys(cfg)
+ if "gt_masks" in collection_keys:
+ val_masks_tensor = find_tensor_with_htype(
+ ds_val, type_kind=TypeKind.BinaryMask, mm_class="gt_masks"
+ ) or find_tensor_with_htype(
+ ds_val, type_kind=TypeKind.Polygon, mm_class="gt_masks"
+ )
+
+ # TODO make sure required tensors are not None.
+ val_pipeline = get_pipeline(cfg, name="val", generic_name="test_pipeline")
+
+ val_dataloader = build_dataloader(
+ ds_val,
+ val_images_tensor,
+ val_masks_tensor,
+ val_boxes_tensor,
+ val_labels_tensor,
+ pipeline=val_pipeline,
+ **val_dataloader_args,
+ )
+
+ eval_cfg["by_epoch"] = cfg.runner["type"] != "DeeplakeIterBasedRunner"
+ eval_hook = EvalHook
+ if distributed:
+ eval_hook = DistEvalHook
+ # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the
+ # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'.
+ runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority="LOW")
+
+ resume_from = None
+ if cfg.resume_from is None and cfg.get("auto_resume"):
+ resume_from = find_latest_checkpoint(cfg.work_dir)
+ if resume_from is not None:
+ cfg.resume_from = resume_from
+
+ if cfg.resume_from:
+ runner.resume(cfg.resume_from)
+ elif cfg.load_from:
+ runner.load_checkpoint(cfg.load_from)
+ runner.run([data_loader], cfg.workflow)
diff --git a/python/deeplake/integrations/mmdet/mmdet_dataset_.py b/python/deeplake/integrations/mmdet/mmdet_dataset_.py
new file mode 100644
index 0000000000..eaa4183976
--- /dev/null
+++ b/python/deeplake/integrations/mmdet/mmdet_dataset_.py
@@ -0,0 +1,823 @@
+from collections import OrderedDict
+from typing import Callable, Optional, List, Dict, Sequence
+
+import os
+import math
+import types
+import torch
+import warnings
+import tempfile
+import numpy as np
+import os.path as osp
+
+from PIL import Image, ImageDraw # type: ignore
+
+from terminaltables import AsciiTable # type: ignore
+
+try:
+ from mmdet.apis.train import auto_scale_lr # type: ignore
+except Exception:
+ import mmdet # type: ignore
+
+ version = mmdet.__version__
+ raise Exception(
+ f"MMDet {version} version is not supported. The latest supported MMDet version with deeplake is 2.28.1."
+ )
+
+from mmdet.core import eval_map, eval_recalls
+from mmdet.core import BitmapMasks, PolygonMasks
+
+import mmcv # type: ignore
+from mmcv.utils import print_log
+
+import deeplake as dp
+from deeplake.types import TypeKind
+
+from deeplake.integrations.mm.upcast_array import upcast_array
+from deeplake.integrations.mm.warnings import always_warn
+from deeplake.integrations.mmdet import mmdet_utils_
+
+from torch.utils.data import DataLoader
+
+# Monkey-patch the function
+from deeplake.integrations.mm.exceptions import InvalidImageError
+from deeplake.integrations.mmdet.test_ import single_gpu_test as custom_single_gpu_test
+from deeplake.integrations.mmdet.test_ import multi_gpu_test as custom_multi_gpu_test
+
+from torch.utils.data import Dataset
+
+
+def coco_pixel_2_pascal_pixel(boxes, shape):
+ """
+ Converts bounding boxes from COCO pixel format (x, y, width, height)
+ to Pascal VOC pixel format (x_min, y_min, x_max, y_max).
+
+ Clipping ensures the bounding boxes have non-negative width and height.
+
+ @param boxes: numpy array of shape (N, 4), containing bounding boxes in COCO format.
+ @param shape: tuple, the shape of the image (height, width).
+
+ @return: numpy array of shape (N, 4), bounding boxes in Pascal VOC format.
+ """
+ pascal_boxes = np.empty((0, 4), dtype=boxes.dtype)
+ if boxes.size != 0:
+ pascal_boxes = np.stack(
+ (
+ boxes[:, 0],
+ boxes[:, 1],
+ boxes[:, 0] + boxes[:, 2],
+ boxes[:, 1] + boxes[:, 3],
+ ),
+ axis=1,
+ )
+ return pascal_boxes
+
+
+def poly_2_mask(polygons, shape):
+ # TODO This doesnt fill the array inplace. out = np.zeros(shape + (len(polygons),), dtype=np.uint8)
+ """
+ Converts a list of polygons into a binary mask.
+
+ @param polygons: list of polygons, where each polygon is a list of (x, y) coordinates.
+ @param shape: tuple, the shape of the mask (height, width).
+
+ @return: numpy array, binary mask of the same size as the image.
+ """
+ out = np.zeros(shape + (len(polygons),), dtype=np.uint8)
+ for i, polygon in enumerate(polygons):
+ im = Image.fromarray(out[..., i])
+ d = ImageDraw.Draw(im)
+ d.polygon(polygon, fill=1)
+ out[..., i] = np.asarray(im)
+ return out
+
+
+def coco_frac_2_pascal_pixel(boxes, shape):
+ """
+ Converts bounding boxes from fractional COCO format (relative to image size)
+ to Pascal VOC pixel format.
+
+ @param boxes: numpy array of shape (N, 4), bounding boxes in fractional COCO format.
+ @param shape: tuple, the shape of the image (height, width).
+
+ @return: numpy array of shape (N, 4), bounding boxes in Pascal VOC format.
+ """
+ bbox = np.empty((0, 4), dtype=boxes.dtype)
+ if boxes.size != 0:
+ x = boxes[:, 0] * shape[1]
+ y = boxes[:, 1] * shape[0]
+ w = boxes[:, 2] * shape[1]
+ h = boxes[:, 3] * shape[0]
+ bbox = np.stack((x, y, w, h), axis=1)
+ return coco_pixel_2_pascal_pixel(bbox, shape)
+
+
+def pascal_frac_2_pascal_pixel(boxes, shape):
+ """
+ Converts bounding boxes from fractional Pascal VOC format (LTRB)
+ to pixel Pascal VOC format.
+
+ @param boxes: numpy array of shape (N, 4), bounding boxes in fractional format.
+ @param shape: tuple, the shape of the image (height, width).
+
+ @return: numpy array of shape (N, 4), bounding boxes in pixel format.
+ """
+ bbox = np.empty((0, 4), dtype=boxes.dtype)
+ if boxes.size != 0:
+ x_top = boxes[:, 0] * shape[1]
+ y_top = boxes[:, 1] * shape[0]
+ x_bottom = boxes[:, 2] * shape[1]
+ y_bottom = boxes[:, 3] * shape[0]
+ bbox = np.stack((x_top, y_top, x_bottom, y_bottom), axis=1)
+ return bbox
+
+
+def yolo_pixel_2_pascal_pixel(boxes, shape):
+ """
+ Converts bounding boxes from YOLO pixel format (center_x, center_y, width, height)
+ to Pascal VOC pixel format (LTRB).
+
+ @param boxes: numpy array of shape (N, 4), bounding boxes in YOLO format.
+ @param shape: tuple, the shape of the image (height, width).
+
+ @return: numpy array of shape (N, 4), bounding boxes in Pascal VOC format.
+ """
+ bbox = np.empty((0, 4), dtype=boxes.dtype)
+ if boxes.size != 0:
+ x_top = np.array(boxes[:, 0]) - np.floor(np.array(boxes[:, 2]) / 2)
+ y_top = np.array(boxes[:, 1]) - np.floor(np.array(boxes[:, 3]) / 2)
+ x_bottom = np.array(boxes[:, 0]) + np.floor(np.array(boxes[:, 2]) / 2)
+ y_bottom = np.array(boxes[:, 1]) + np.floor(np.array(boxes[:, 3]) / 2)
+ bbox = np.stack((x_top, y_top, x_bottom, y_bottom), axis=1)
+ return bbox
+
+
+def yolo_frac_2_pascal_pixel(boxes, shape):
+ """
+ Converts bounding boxes from YOLO fractional format to Pascal VOC pixel format.
+
+ @param boxes: numpy array of shape (N, 4), bounding boxes in YOLO fractional format.
+ @param shape: tuple, the shape of the image (height, width).
+
+ @return: numpy array of shape (N, 4), bounding boxes in Pascal VOC format.
+ """
+ bbox = np.empty((0, 4), dtype=boxes.dtype)
+ if boxes.size != 0:
+ x_center = boxes[:, 0] * shape[1]
+ y_center = boxes[:, 1] * shape[0]
+ width = boxes[:, 2] * shape[1]
+ height = boxes[:, 3] * shape[0]
+ bbox = np.stack((x_center, y_center, width, height), axis=1)
+ return yolo_pixel_2_pascal_pixel(bbox, shape)
+
+
+def get_bbox_format(bbox, bbox_info):
+ bbox_info = bbox_info.get("coords")
+ if not bbox_info:
+ bbox_info = {}
+ mode = bbox_info.get("mode", "LTWH")
+ type = bbox_info.get("type", "pixel")
+
+ if len(bbox_info) == 0 and np.mean(bbox) < 1:
+ mode = "CCWH"
+ type = "fractional"
+ return (mode, type)
+
+
+BBOX_FORMAT_TO_PASCAL_CONVERTER = {
+ ("LTWH", "pixel"): coco_pixel_2_pascal_pixel,
+ ("LTWH", "fractional"): coco_frac_2_pascal_pixel,
+ ("LTRB", "pixel"): lambda x, y: x,
+ ("LTRB", "fractional"): pascal_frac_2_pascal_pixel,
+ ("CCWH", "pixel"): yolo_pixel_2_pascal_pixel,
+ ("CCWH", "fractional"): yolo_frac_2_pascal_pixel,
+}
+
+
+def convert_to_pascal_format(bbox, bbox_info, shape):
+ bbox_format = get_bbox_format(bbox, bbox_info)
+ converter = BBOX_FORMAT_TO_PASCAL_CONVERTER[bbox_format]
+ return converter(bbox, shape)
+
+
+def pascal_pixel_2_coco_pixel(boxes, images):
+ """
+ Converts bounding boxes from Pascal VOC pixel format (LTRB)
+ to COCO pixel format (x, y, width, height).
+
+ @param boxes: numpy array of images (N, 4), bounding boxes in Pascal VOC format.
+ @param images: tuple, the images of the image (height, width).
+
+ @return: numpy array of images (N, 4), bounding boxes in COCO pixel format.
+ """
+ pascal_boxes = []
+ for box in boxes:
+ if box.size != 0:
+ pascal_boxes.append(
+ np.stack(
+ (
+ box[:, 0],
+ box[:, 1],
+ box[:, 2] - box[:, 0],
+ box[:, 3] - box[:, 1],
+ ),
+ axis=1,
+ )
+ )
+ else:
+ pascal_boxes.append(box)
+ return pascal_boxes
+
+
+def pascal_frac_2_coco_pixel(boxes, images):
+ pascal_pixel_boxes = []
+ for i, box in enumerate(boxes):
+ if box.size != 0:
+ shape = images[i].shape
+ x_top = box[:, 0] * shape[1]
+ y_top = box[:, 1] * shape[0]
+ x_bottom = box[:, 2] * shape[1]
+ y_bottom = box[:, 3] * shape[0]
+ bbox = np.stack((x_top, y_top, x_bottom, y_bottom), axis=1)
+ pascal_pixel_boxes.append(bbox)
+ return pascal_pixel_2_coco_pixel(pascal_pixel_boxes, images)
+
+
+def yolo_pixel_2_coco_pixel(boxes, images):
+ yolo_boxes = []
+ for box in boxes:
+ if box.size != 0:
+ x_top = np.array(box[:, 0]) - np.floor(np.array(box[:, 2]) / 2)
+ y_top = np.array(box[:, 1]) - np.floor(np.array(box[:, 3]) / 2)
+ w = box[:, 2]
+ h = box[:, 3]
+ bbox = np.stack([x_top, y_top, w, h], axis=1)
+ yolo_boxes.append(bbox)
+ return yolo_boxes
+
+
+def yolo_frac_2_coco_pixel(boxes, images):
+ yolo_boxes = []
+ for i, box in enumerate(boxes):
+ shape = images[i].shape
+ x_center = box[:, 0] * shape[1]
+ y_center = box[:, 1] * shape[0]
+ width = box[:, 2] * shape[1]
+ height = box[:, 3] * shape[0]
+ bbox = np.stack((x_center, y_center, width, height), axis=1)
+ yolo_boxes.append(bbox)
+ return yolo_pixel_2_coco_pixel(yolo_boxes, images)
+
+
+def coco_frac_2_coco_pixel(boxes, images):
+ coco_pixel_boxes = []
+ for i, box in enumerate(boxes):
+ shape = images[i].shape
+ x = box[:, 0] * shape[1]
+ y = box[:, 1] * shape[0]
+ w = box[:, 2] * shape[1]
+ h = box[:, 3] * shape[0]
+ bbox = np.stack((x, y, w, h), axis=1)
+ coco_pixel_boxes.append(bbox)
+ return np.array(coco_pixel_boxes)
+
+
+BBOX_FORMAT_TO_COCO_CONVERTER = {
+ ("LTWH", "pixel"): lambda x, y: x,
+ ("LTWH", "fractional"): coco_frac_2_coco_pixel,
+ ("LTRB", "pixel"): pascal_pixel_2_coco_pixel,
+ ("LTRB", "fractional"): pascal_frac_2_coco_pixel,
+ ("CCWH", "pixel"): yolo_pixel_2_coco_pixel,
+ ("CCWH", "fractional"): yolo_frac_2_coco_pixel,
+}
+
+
+def convert_to_coco_format(bbox, bbox_format, images):
+ converter = BBOX_FORMAT_TO_COCO_CONVERTER[bbox_format]
+ return converter(bbox, images)
+
+
+def first_non_empty(bboxes):
+ for box in bboxes:
+ if len(box):
+ return box
+ raise ValueError("Empty bboxes")
+
+
+def transform(
+ sample_in,
+ images_tensor: str,
+ masks_tensor: str,
+ boxes_tensor: str,
+ labels_tensor: str,
+ pipeline: Callable,
+ bbox_info: str,
+ poly2mask: bool,
+):
+ img = upcast_array(sample_in[images_tensor])
+ if not isinstance(img, np.ndarray):
+ img = np.array(img)
+
+ bboxes = upcast_array(sample_in[boxes_tensor])
+ # TODO bbox format should be recognized outside the transform, not per sample basis.
+ bboxes = convert_to_pascal_format(bboxes, bbox_info, img.shape)
+ if bboxes.shape == (0, 0): # TO DO: remove after bug will be fixed
+ bboxes = np.empty((0, 4), dtype=sample_in[boxes_tensor].dtype)
+
+ labels = upcast_array(sample_in[labels_tensor])
+
+ if img.ndim == 2:
+ img = np.expand_dims(img, -1)
+
+ img = img[..., ::-1] # rgb_to_bgr should be optional
+ if img.shape[2] == 1:
+ img = np.repeat(img, 3, axis=2)
+ shape = img.shape
+
+ pipeline_dict = {
+ "img": np.ascontiguousarray(img, dtype=np.float32),
+ "img_fields": ["img"],
+ "filename": None,
+ "ori_filename": None,
+ "img_shape": shape,
+ "ori_shape": shape,
+ "gt_bboxes": bboxes,
+ "gt_labels": labels,
+ "bbox_fields": ["gt_bboxes"],
+ }
+
+ if masks_tensor:
+ masks = upcast_array(sample_in[masks_tensor])
+ if poly2mask:
+ masks = mmdet_utils_.convert_poly_to_coco_format(masks)
+ masks = PolygonMasks(
+ [process_polygons(polygons) for polygons in masks], shape[0], shape[1]
+ )
+ else:
+ masks = BitmapMasks(masks.astype(np.uint8).transpose(2, 0, 1), *shape[:2])
+
+ pipeline_dict["gt_masks"] = masks
+ pipeline_dict["mask_fields"] = ["gt_masks"]
+ return pipeline(pipeline_dict)
+
+
+def process_polygons(polygons):
+ """Convert polygons to list of ndarray and filter invalid polygons.
+
+ Args:
+ polygons (list[list]): Polygons of one instance.
+
+ Returns:
+ list[numpy.ndarray]: Processed polygons.
+ """
+
+ polygons = [np.array(p) for p in polygons]
+ valid_polygons = []
+ for polygon in polygons:
+ if len(polygon) % 2 == 0 and len(polygon) >= 6:
+ valid_polygons.append(polygon)
+ return valid_polygons
+
+
+class MMDetTorchDataset(Dataset):
+ def __init__(
+ self,
+ dataset,
+ tensors: Optional[Sequence[str]] = None,
+ transform: Optional[Callable] = None,
+ ) -> None:
+ super().__init__()
+ self.dataset = dataset
+ self.transform = transform
+ self.column_names = [col.name for col in self.dataset.schema.columns]
+ self.last_successful_index = -1
+
+ def __getstate__(self):
+ return {
+ "dataset": self.dataset,
+ "transform": self.transform,
+ "column_names": self.column_names,
+ "last_successful_index": self.last_successful_index,
+ }
+
+ def __setstate__(self, state):
+ """Restore state from pickled state."""
+ if hasattr(super(), "__setstate__"):
+ super().__setstate__(state)
+
+ self.dataset = state["dataset"]
+ self.transform = state["transform"]
+ self.column_names = state["column_names"]
+ self.last_successful_index = state["last_successful_index"]
+
+ def __len__(self):
+ return len(self.dataset)
+
+ def __getitem__(self, idx):
+ while True:
+ try:
+ sample = self.dataset[idx]
+ if self.transform:
+ return self.transform(sample)
+ else:
+ out = {}
+ for col in self.column_names:
+ out[col] = sample[col]
+ return out
+ except InvalidImageError as e:
+ print(f"Error processing data at index {idx}: {e}")
+ if self.last_successful_index == -1:
+ self.last_successful_index = idx + 1
+ idx = self.last_successful_index
+ continue
+
+
+class MMDetDataset(MMDetTorchDataset):
+ def __init__(
+ self,
+ *args,
+ tensors_dict=None,
+ mode="train",
+ metrics_format="COCO",
+ bbox_info=None,
+ pipeline=None,
+ num_gpus=1,
+ batch_size=1,
+ **kwargs,
+ ):
+ super().__init__(*args, **kwargs)
+ self.mode = mode
+ self.pipeline = pipeline
+ self.num_gpus = num_gpus
+ self.batch_size = batch_size
+ self.tensors_dict = tensors_dict
+ self.bbox_info = bbox_info
+ if self.mode in ("val", "test"):
+ self.images = self._get_images(self.tensors_dict["images_tensor"])
+ masks = self._get_masks(self.tensors_dict.get("masks_tensor", None))
+ masks_type_kind = (
+ self.dataset.schema[masks.name].dtype.kind
+ if masks is not None and masks != []
+ else None
+ )
+ self.masks_type_kind = masks_type_kind
+ self.masks = masks[:]
+ self.bboxes = self._get_bboxes(self.tensors_dict["boxes_tensor"])
+ bbox_format = get_bbox_format(first_non_empty(self.bboxes), bbox_info)
+ self.labels = self._get_labels(self.tensors_dict["labels_tensor"])
+ self.iscrowds = self._get_iscrowds(self.tensors_dict.get("iscrowds"))
+ self.CLASSES = self.get_classes(self.tensors_dict["labels_tensor"])
+ self.metrics_format = metrics_format
+ coco_style_bbox = convert_to_coco_format(
+ self.bboxes, bbox_format, self.images
+ )
+
+ if self.metrics_format == "COCO":
+ self.evaluator = mmdet_utils_.COCODatasetEvaluater(
+ pipeline,
+ classes=self.CLASSES,
+ deeplake_dataset=self.dataset,
+ imgs=self.images,
+ masks=self.masks,
+ masks_type_kind=self.masks_type_kind,
+ bboxes=coco_style_bbox,
+ labels=self.labels,
+ iscrowds=self.iscrowds,
+ bbox_format=bbox_format,
+ num_gpus=num_gpus,
+ )
+ else:
+ self.evaluator = None
+
+ def __getstate__(self):
+ """Prepare state for pickling."""
+ state = super().__getstate__() if hasattr(super(), "__getstate__") else {}
+
+ state.update(
+ {
+ "mode": self.mode,
+ "pipeline": self.pipeline,
+ "num_gpus": self.num_gpus,
+ "batch_size": self.batch_size,
+ "tensors_dict": self.tensors_dict,
+ "bbox_info": self.bbox_info,
+ }
+ )
+ return state
+
+ def __setstate__(self, state):
+ """Restore state from pickled state."""
+ if hasattr(super(), "__setstate__"):
+ super().__setstate__(state)
+
+ self.mode = state["mode"]
+ self.pipeline = state["pipeline"]
+ self.num_gpus = state["num_gpus"]
+ self.batch_size = state["batch_size"]
+ self.tensors_dict = state["tensors_dict"]
+ self.bbox_info = state["bbox_info"]
+
+ if self.mode in ("val", "test"):
+ self.images = self._get_images(self.tensors_dict["images_tensor"])
+ masks = self._get_masks(self.tensors_dict.get("masks_tensor", None))
+ masks_type_kind = (
+ self.dataset.schema[masks.name].dtype.kind
+ if masks is not None and masks != []
+ else None
+ )
+ self.masks_type_kind = masks_type_kind
+ self.masks = masks[:]
+ self.bboxes = self._get_bboxes(self.tensors_dict["boxes_tensor"])
+ bbox_format = get_bbox_format(first_non_empty(self.bboxes), bbox_info)
+ self.labels = self._get_labels(self.tensors_dict["labels_tensor"])
+ self.iscrowds = self._get_iscrowds(self.tensors_dict.get("iscrowds"))
+ self.CLASSES = self.get_classes(self.tensors_dict["labels_tensor"])
+ self.metrics_format = metrics_format
+ coco_style_bbox = convert_to_coco_format(
+ self.bboxes, bbox_format, self.images
+ )
+
+ if self.metrics_format == "COCO":
+ self.evaluator = mmdet_utils_.COCODatasetEvaluater(
+ pipeline,
+ classes=self.CLASSES,
+ deeplake_dataset=self.dataset,
+ imgs=self.images,
+ masks=self.masks,
+ masks_type_kind=self.masks_type_kind,
+ bboxes=coco_style_bbox,
+ labels=self.labels,
+ iscrowds=self.iscrowds,
+ bbox_format=bbox_format,
+ num_gpus=num_gpus,
+ )
+ else:
+ self.evaluator = None
+
+ def __len__(self):
+ if self.mode == "val":
+ per_gpu_length = math.floor(
+ len(self.dataset) / (self.batch_size * self.num_gpus)
+ )
+ total_length = per_gpu_length * self.num_gpus
+ return total_length
+ return super().__len__()
+
+ def _get_images(self, images_tensor):
+ image_tensor = self.dataset[images_tensor]
+ return image_tensor
+
+ def _get_masks(self, masks_tensor):
+ if masks_tensor is None:
+ return []
+ return self.dataset[masks_tensor]
+
+ def _get_iscrowds(self, iscrowds_tensor):
+ if iscrowds_tensor is not None:
+ return iscrowds_tensor
+
+ if "iscrowds" in [col.name for col in self.dataset.schema.columns]:
+ always_warn(
+ "Iscrowds was not specified, searching for iscrowds tensor in the dataset."
+ )
+ return self.dataset["iscrowds"][:]
+ always_warn("iscrowds tensor was not found, setting its value to 0.")
+ return iscrowds_tensor
+
+ def _get_bboxes(self, boxes_tensor):
+ return self.dataset[boxes_tensor][:]
+
+ def _get_labels(self, labels_tensor):
+ return self.dataset[labels_tensor][:]
+
+ def _get_class_names(self, labels_tensor):
+ return self.dataset[labels_tensor].metadata["class_names"]
+
+ def get_ann_info(self, idx):
+ """Get annotation by index.
+
+ Args:
+ idx (int): Index of data.
+
+ Raises:
+ ValueError: when ``self.metrics`` is not valid.
+
+ Returns:
+ dict: Annotation info of specified index.
+ """
+ bboxes = convert_to_pascal_format(
+ self.bboxes[idx], self.bbox_info, self.images[idx].shape
+ )
+ return {"bboxes": bboxes, "labels": self.labels[idx]}
+
+ def get_cat_ids(self, idx):
+ """Get category ids by index.
+
+ Args:
+ idx (int): Index of data.
+
+ Returns:
+ list[int]: All categories in the image of specified index.
+ """
+
+ cat_ids = self.labels[idx].astype(np.int).tolist()
+
+ return cat_ids
+
+ def _filter_imgs(self, min_size=32):
+ """Filter images too small."""
+ if self.filter_empty_gt:
+ warnings.warn("CustomDataset does not support filtering empty gt images.")
+ valid_inds = []
+ for i, img_info in enumerate(self.data_infos):
+ if min(img_info["width"], img_info["height"]) >= min_size:
+ valid_inds.append(i)
+ return valid_inds
+
+ def get_classes(self, classes):
+ """Get class names of current dataset.
+
+ Args:
+ classes (str): Reresents the name of the classes tensor. Overrides the CLASSES defined by the dataset.
+
+ Returns:
+ list[str]: Names of categories of the dataset.
+ """
+ return self.dataset[classes].metadata["class_names"]
+
+ def evaluate(
+ self,
+ results,
+ metric="mAP",
+ logger=None,
+ proposal_nums=(100, 300, 1000),
+ iou_thr=0.5, #
+ scale_ranges=None,
+ **kwargs,
+ ):
+ """Evaluate the dataset.
+
+ Args:
+ **kwargs (dict): Keyword arguments to pass to self.evaluate object
+ results (list): Testing results of the dataset.
+ metric (str | list[str]): Metrics to be evaluated.
+ logger (logging.Logger | None | str): Logger used for printing
+ related information during evaluation. Default: None.
+ proposal_nums (Sequence[int]): Proposal number used for evaluating
+ recalls, such as recall@100, recall@1000.
+ Default: (100, 300, 1000).
+ iou_thr (float | list[float]): IoU threshold. Default: 0.5.
+ scale_ranges (list[tuple] | None): Scale ranges for evaluating mAP.
+ Default: None.
+
+ Raises:
+ KeyError: if a specified metric format is not supported
+
+ Returns:
+ OrderedDict: Evaluation metrics dictionary
+ """
+ if self.num_gpus > 1:
+ results_ordered = []
+ for i in range(self.num_gpus):
+ results_ordered += results[i :: self.num_gpus]
+ results = results_ordered
+
+ if self.evaluator is None:
+ if not isinstance(metric, str):
+ assert len(metric) == 1
+ metric = metric[0]
+ allowed_metrics = ["mAP", "recall"]
+ if metric not in allowed_metrics:
+ raise KeyError(f"metric {metric} is not supported")
+ annotations = [
+ self.get_ann_info(i) for i in range(len(self))
+ ] # directly evaluate from hub
+ eval_results = OrderedDict()
+ iou_thrs = [iou_thr] if isinstance(iou_thr, float) else iou_thr
+ if metric == "mAP":
+ assert isinstance(iou_thrs, list)
+ mean_aps = []
+ for iou_thr in iou_thrs:
+ print_log(f'\n{"-" * 15}iou_thr: {iou_thr}{"-" * 15}')
+ mean_ap, _ = eval_map(
+ results,
+ annotations,
+ scale_ranges=scale_ranges,
+ iou_thr=iou_thr,
+ dataset=self.CLASSES,
+ logger=logger,
+ )
+ mean_aps.append(mean_ap)
+ eval_results[f"AP{int(iou_thr * 100):02d}"] = round(mean_ap, 3)
+ eval_results["mAP"] = sum(mean_aps) / len(mean_aps)
+ elif metric == "recall":
+ gt_bboxes = [ann["bboxes"] for ann in annotations] # evaluate from hub
+ recalls = eval_recalls(
+ gt_bboxes, results, proposal_nums, iou_thr, logger=logger
+ )
+ for i, num in enumerate(proposal_nums):
+ for j, iou in enumerate(iou_thrs):
+ eval_results[f"recall@{num}@{iou}"] = recalls[i, j]
+ if recalls.shape[1] > 1:
+ ar = recalls.mean(axis=1)
+ for i, num in enumerate(proposal_nums):
+ eval_results[f"AR@{num}"] = ar[i]
+ return eval_results
+
+ return self.evaluator.evaluate(
+ results,
+ metric=metric,
+ logger=logger,
+ proposal_nums=proposal_nums,
+ **kwargs,
+ )
+
+ @staticmethod
+ def _coco_2_pascal(boxes):
+ # Convert bounding boxes to Pascal VOC format and clip bounding boxes to make sure they have non-negative width and height
+ return np.stack(
+ (
+ boxes[:, 0],
+ boxes[:, 1],
+ boxes[:, 0] + boxes[:, 2],
+ boxes[:, 1] + boxes[:, 3],
+ ),
+ axis=1,
+ )
+
+ def __repr__(self):
+ """Print the number of instance number."""
+ dataset_type = "Test"
+ # if self.test_mode else "Train"
+ result = (
+ f"\n{self.__class__.__name__} {dataset_type} dataset "
+ f"with number of images {len(self)}, "
+ f"and instance counts: \n"
+ )
+ if self.CLASSES is None:
+ result += "Category names are not provided. \n"
+ return result
+ instance_count = np.zeros(len(self.CLASSES) + 1).astype(int)
+ # count the instance number in each image
+ for idx in range(len(self)):
+ label = self.get_ann_info(idx)["labels"] # change this
+ unique, counts = np.unique(label, return_counts=True)
+ if len(unique) > 0:
+ # add the occurrence number to each class
+ instance_count[unique] += counts
+ else:
+ # background is the last index
+ instance_count[-1] += 1
+ # create a table with category count
+ table_data = [["category", "count"] * 5]
+ row_data = []
+ for cls, count in enumerate(instance_count):
+ if cls < len(self.CLASSES):
+ row_data += [f"{cls} [{self.CLASSES[cls]}]", f"{count}"]
+ else:
+ # add the background number
+ row_data += ["-1 background", f"{count}"]
+ if len(row_data) == 10:
+ table_data.append(row_data)
+ row_data = []
+ if len(row_data) >= 2:
+ if row_data[-1] == "0":
+ row_data = row_data[:-2]
+ if len(row_data) >= 2:
+ table_data.append([])
+ table_data.append(row_data)
+
+ table = AsciiTable(table_data)
+ result += table.table
+ return result
+
+ def format_results(self, results, jsonfile_prefix=None, **kwargs):
+ """Format the results to json (standard format for COCO evaluation).
+
+ Args:
+ results (list[tuple | numpy.ndarray]): Testing results of the
+ dataset.
+ jsonfile_prefix (str | None): The prefix of json files. It includes
+ the file path and the prefix of filename, e.g., "a/b/prefix".
+ If not specified, a temp file will be created. Default: None.
+ kwargs (dict): Additional keyword arguments to be passed.
+
+ Returns:
+ tuple: (result_files, tmp_dir), result_files is a dict containing
+ the json filepaths, tmp_dir is the temporal directory created
+ for saving json files when jsonfile_prefix is not specified.
+ """
+ assert isinstance(results, list), "results must be a list"
+ assert len(results) == len(
+ self
+ ), "The length of results is not equal to the dataset len: {} != {}".format(
+ len(results), len(self)
+ )
+
+ if jsonfile_prefix is None:
+ tmp_dir = tempfile.TemporaryDirectory()
+ jsonfile_prefix = osp.join(tmp_dir.name, "results")
+ else:
+ tmp_dir = None
+ result_files = self.results2json(results, jsonfile_prefix)
+ return result_files, tmp_dir
diff --git a/python/deeplake/integrations/mmdet/mmdet_utils_.py b/python/deeplake/integrations/mmdet/mmdet_utils_.py
new file mode 100644
index 0000000000..741a1128a8
--- /dev/null
+++ b/python/deeplake/integrations/mmdet/mmdet_utils_.py
@@ -0,0 +1,498 @@
+import time
+import warnings
+import pycocotools # type: ignore
+import numpy as np
+import copy
+import itertools
+import pycocotools.mask as maskUtils # type: ignore
+from pycocotools import coco as pycocotools_coco # type: ignore
+from pycocotools import mask as _mask
+from collections import defaultdict
+import sys
+from typing import Union, Dict, List
+
+PYTHON_VERSION = sys.version_info[0]
+if PYTHON_VERSION == 2:
+ from urllib import urlretrieve # type: ignore
+elif PYTHON_VERSION == 3:
+ from urllib.request import urlretrieve
+from mmdet.datasets import coco as mmdet_coco # type: ignore
+from mmdet.datasets import pipelines
+from deeplake.integrations.mm.warnings import always_warn
+from deeplake.types import TypeKind
+import json
+import mmcv # type: ignore
+import math
+from tqdm import tqdm
+
+
+def _isArrayLike(obj):
+ return hasattr(obj, "__iter__") and hasattr(obj, "__len__")
+
+
+class _COCO(pycocotools_coco.COCO):
+ def __init__(
+ self,
+ deeplake_dataset=None,
+ imgs=None,
+ masks=None,
+ masks_type_kind=None,
+ bboxes=None,
+ labels=None,
+ iscrowds=None,
+ class_names=None,
+ bbox_format=("LTRB", "pixel"),
+ ):
+ """
+ Constructor of Microsoft COCO helper class for reading and visualizing annotations.
+ :param annotation_file (str): location of annotation file
+ :param image_folder (str): location to the folder that hosts images.
+ :return:
+ """
+ self.masks = masks
+ self.masks_type_kind = masks_type_kind
+ self.bboxes = bboxes
+ self.labels = labels
+ self.imgs_orig = imgs
+ self.iscrowds = iscrowds
+ self.class_names = class_names
+ self.bbox_format = bbox_format
+
+ # load dataset
+ self.anns, self.cats, self.imgs = dict(), dict(), dict()
+ self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
+ print("loading annotations into memory...")
+ self.dataset = deeplake_dataset
+ if self.dataset is not None:
+ self.createDeeplakeIndex()
+
+ def createDeeplakeIndex(self):
+ # create index
+ print("creating index...")
+ anns, cats, imgs = {}, {}, {}
+ imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
+ absolute_id = 0
+ all_categories = self.labels
+ all_bboxes = self.bboxes
+ all_masks = self.masks
+ all_imgs = self.imgs_orig
+ all_iscrowds = self.iscrowds
+
+ for row_index, row in tqdm(
+ enumerate(self.dataset),
+ desc="loading annotations",
+ total=len(self.dataset),
+ ):
+ if all_imgs[row_index].size == 0:
+ always_warn(
+ "found empty image, skipping it. Please verify that your dataset is not corrupted."
+ )
+ continue
+ categories = all_categories[row_index] # make referencig custom
+ bboxes = all_bboxes[row_index]
+ if all_masks != [] and all_masks is not None:
+ masks = all_masks[row_index]
+ else:
+ masks = None
+ if all_iscrowds is not None:
+ is_crowds = all_iscrowds[row_index]
+ else:
+ is_crowds = np.zeros_like(categories)
+ img = {
+ "id": row_index,
+ "height": all_imgs[row_index].shape[0],
+ "width": all_imgs[row_index].shape[1],
+ }
+ imgs[row_index] = img
+ for bbox_index, bbox in enumerate(bboxes):
+ if self.masks is not None and self.masks != []:
+ if self.masks_type_kind == TypeKind.BinaryMask:
+ if masks.size == 0:
+ mask = _mask.encode(np.asfortranarray(masks[:]))
+ else:
+ mask = _mask.encode(
+ np.asfortranarray(masks[..., bbox_index])
+ )
+
+ elif self.masks_type_kind == TypeKind.Polygon:
+ mask = convert_poly_to_coco_format(masks[:][bbox_index])
+ else:
+ raise Exception(
+ f"type_kind={self.masks_type_kind} is not supported yet."
+ )
+ ann = {
+ "image_id": row_index,
+ "id": absolute_id,
+ "category_id": categories[bbox_index],
+ "bbox": bbox,
+ "area": bbox[2] * bbox[3],
+ "segmentation": (
+ mask if masks is not None else None
+ ), # optimize here
+ "iscrowd": int(is_crowds[bbox_index]),
+ }
+
+ imgToAnns[row_index].append(ann)
+ anns[absolute_id] = ann
+ absolute_id += 1
+
+ category_names = self.class_names # TO DO: add super category names
+ category_names = [
+ {"id": cat_id, "name": name} for cat_id, name in enumerate(category_names)
+ ]
+
+ for idx, category_name in enumerate(category_names):
+ cats[idx] = category_name
+
+ for ann in anns.values():
+ catToImgs[ann["category_id"]].append(ann["image_id"])
+
+ # create class members
+ self.anns = anns
+ self.imgToAnns = imgToAnns
+ self.catToImgs = catToImgs
+ self.imgs = imgs
+ self.cats = cats
+ print("create index done!")
+
+ def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
+ """
+ Get ann ids that satisfy given filter conditions. default skips that filter
+ :param imgIds (int array) : get anns for given imgs
+ catIds (int array) : get anns for given cats
+ areaRng (float array) : get anns for given area range (e.g. [0 inf])
+ iscrowd (boolean) : get anns for given crowd label (False or True)
+ :return: ids (int array) : integer array of ann ids
+ """
+ imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
+ catIds = catIds if _isArrayLike(catIds) else [catIds]
+
+ if len(imgIds) == len(catIds) == len(areaRng) == 0:
+ anns = list(self.anns.values())
+ else:
+ if not len(imgIds) == 0:
+ lists = [
+ self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns
+ ]
+ anns = list(itertools.chain.from_iterable(lists))
+ else:
+ anns = list(self.anns.values())
+ anns = (
+ anns
+ if len(catIds) == 0
+ else [ann for ann in anns if ann["category_id"] in catIds]
+ )
+ anns = (
+ anns
+ if len(areaRng) == 0
+ else [
+ ann
+ for ann in anns
+ if ann["area"] > areaRng[0] and ann["area"] < areaRng[1]
+ ]
+ )
+ if not iscrowd == None:
+ ids = [ann["id"] for ann in anns.values() if ann["iscrowd"] == iscrowd]
+ else:
+ ids = [ann["id"] for ann in anns]
+ return ids
+
+ def getCatIds(self, catNms: List = [], supNms: List = [], catIds: List = []):
+ """Filtering parameters.
+
+ Args:
+ catNms (List): get cats for given cat names
+ supNms (List): get classes for given supercategory names
+ catIds (List): get cats for given cat ids
+
+ Returns:
+ ids (List[int]): integer array of cat ids
+ """
+ catNms = catNms if _isArrayLike(catNms) else [catNms]
+ supNms = supNms if _isArrayLike(supNms) else [supNms]
+ catIds = catIds if _isArrayLike(catIds) else [catIds]
+
+ if len(catNms) == len(supNms) == len(catIds) == 0:
+ cats = list(self.cats.values())
+ else:
+ cats = list(self.cats.values())
+ cats = (
+ cats
+ if len(catNms) == 0
+ else [cat for cat in cats if cat["name"] in catNms]
+ )
+ cats = (
+ cats
+ if len(supNms) == 0
+ else [cat for cat in cats if cat["supercategory"] in supNms]
+ )
+ cats = (
+ cats
+ if len(catIds) == 0
+ else [cat for cat in cats if cat["id"] in catIds]
+ )
+ ids = [cat["id"] for cat in cats]
+ return ids
+
+ def loadRes(self, resFile):
+ """
+ Load result file and return a result api object.
+ :param resFile (str) : file name of result file
+ :return: res (obj) : result api object
+ """
+ res = _COCO()
+ res.dataset = {}
+ res.dataset["images"] = [img for img in list(self.imgs.values())]
+
+ print("Loading and preparing results...")
+ tic = time.time()
+ if type(resFile) == str or (PYTHON_VERSION == 2 and type(resFile) == unicode):
+ with open(resFile) as f:
+ anns = json.load(f)
+ elif type(resFile) == np.ndarray:
+ anns = self.loadNumpyAnnotations(resFile)
+ else:
+ anns = resFile
+ assert type(anns) == list, "results in not an array of objects"
+ annsImgIds = [ann["image_id"] for ann in anns]
+ assert set(annsImgIds) == (
+ set(annsImgIds) & set(self.getImgIds())
+ ), "Results do not correspond to current coco set"
+ if "caption" in anns[0]:
+ imgIds = set([img["id"] for img in res.dataset["images"]]) & set(
+ [ann["image_id"] for ann in anns]
+ )
+ res.dataset["images"] = [
+ img for img in res.dataset["images"] if img["id"] in imgIds
+ ]
+ for id, ann in enumerate(anns):
+ ann["id"] = id + 1
+ elif "bbox" in anns[0] and not anns[0]["bbox"] == []:
+ res.dataset["categories"] = copy.deepcopy(list(self.cats.values()))
+ for id, ann in enumerate(anns):
+ bb = ann["bbox"]
+ x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
+ if not "segmentation" in ann:
+ ann["segmentation"] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
+ ann["area"] = bb[2] * bb[3]
+ ann["id"] = id + 1
+ ann["iscrowd"] = 0
+ elif "segmentation" in anns[0]:
+ res.dataset["categories"] = copy.deepcopy(list(self.cats.values()))
+ for id, ann in enumerate(anns):
+ # now only support compressed RLE format as segmentation results
+ ann["area"] = maskUtils.area(ann["segmentation"])
+ if not "bbox" in ann:
+ ann["bbox"] = maskUtils.toBbox(ann["segmentation"])
+ ann["id"] = id + 1
+ ann["iscrowd"] = 0
+ elif "keypoints" in anns[0]:
+ res.dataset["categories"] = copy.deepcopy(list(self.cats.values()))
+ for id, ann in enumerate(anns):
+ s = ann["keypoints"]
+ x = s[0::3]
+ y = s[1::3]
+ x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y)
+ ann["area"] = (x1 - x0) * (y1 - y0)
+ ann["id"] = id + 1
+ ann["bbox"] = [x0, y0, x1 - x0, y1 - y0]
+ print("DONE (t={:0.2f}s)".format(time.time() - tic))
+
+ res.dataset["annotations"] = anns
+ res.createIndex()
+ return res
+
+
+class DeeplakeCOCO(_COCO):
+ """This class is almost the same as official pycocotools package.
+
+ It implements some snake case function aliases. So that the COCO class has
+ the same interface as LVIS class.
+ """
+
+ def __init__(
+ self,
+ deeplake_dataset=None,
+ imgs=None,
+ masks=None,
+ masks_type_kind=None,
+ bboxes=None,
+ labels=None,
+ iscrowds=None,
+ class_names=None,
+ bbox_format=("LTRB", "pixel"),
+ ):
+ if getattr(pycocotools, "__version__", "0") >= "12.0.2":
+ warnings.warn(
+ 'mmpycocotools is deprecated. Please install official pycocotools by "pip install pycocotools"', # noqa: E501
+ UserWarning,
+ )
+ super().__init__(
+ deeplake_dataset=deeplake_dataset,
+ imgs=imgs,
+ masks=masks,
+ masks_type_kind=masks_type_kind,
+ labels=labels,
+ bboxes=bboxes,
+ iscrowds=iscrowds,
+ class_names=class_names,
+ bbox_format=bbox_format,
+ )
+ self.img_ann_map = self.imgToAnns
+ self.cat_img_map = self.catToImgs
+
+ def get_ann_ids(self, img_ids=[], cat_ids=[], area_rng=[], iscrowd=None):
+ return self.getAnnIds(img_ids, cat_ids, area_rng, iscrowd)
+
+ def get_cat_ids(self, cat_names=[], sup_names=[], cat_ids=[]):
+ return self.getCatIds(cat_names, sup_names, cat_ids)
+
+ def get_img_ids(self, img_ids=[], cat_ids=[]):
+ return self.getImgIds(img_ids, cat_ids)
+
+ def load_anns(self, ids):
+ return self.loadAnns(ids)
+
+ def load_cats(self, ids):
+ return self.loadCats(ids)
+
+ def load_imgs(self, ids):
+ return self.loadImgs(ids)
+
+
+class COCODatasetEvaluater(mmdet_coco.CocoDataset):
+ def __init__(
+ self,
+ pipeline,
+ deeplake_dataset=None,
+ classes=None,
+ img_prefix="",
+ seg_prefix=None,
+ seg_suffix=".png",
+ proposal_file=None,
+ test_mode=True,
+ filter_empty_gt=True,
+ file_client_args=dict(backend="disk"),
+ imgs=None,
+ masks=None,
+ masks_type_kind=None,
+ bboxes=None,
+ labels=None,
+ iscrowds=None,
+ bbox_format=None,
+ batch_size=1,
+ num_gpus=1,
+ ):
+ self.img_prefix = img_prefix
+ self.seg_prefix = seg_prefix
+ self.seg_suffix = seg_suffix
+ self.proposal_file = proposal_file
+ self.test_mode = test_mode
+ self.filter_empty_gt = filter_empty_gt
+ self.file_client = mmcv.FileClient(**file_client_args)
+ self.CLASSES = classes
+ self.batch_size = batch_size
+ self.num_gpus = num_gpus
+ self.masks_type_kind = masks_type_kind
+
+ self.data_infos = self.load_annotations(
+ deeplake_dataset,
+ imgs=imgs,
+ labels=labels,
+ masks=masks,
+ masks_type_kind=self.masks_type_kind,
+ bboxes=bboxes,
+ iscrowds=iscrowds,
+ class_names=self.CLASSES,
+ bbox_format=bbox_format,
+ )
+ self.proposals = None
+
+ # filter images too small and containing no annotations
+ if not test_mode:
+ valid_inds = self._filter_imgs()
+ self.data_infos = [self.data_infos[i] for i in valid_inds]
+ if self.proposals is not None:
+ self.proposals = [self.proposals[i] for i in valid_inds]
+ # set group flag for the sampler
+ self._set_group_flag()
+
+ # processing pipeline
+
+ def pipeline(self, x):
+ return x
+
+ def __len__(self):
+ length = super().__len__()
+ per_gpu_length = math.floor(length / (self.batch_size * self.num_gpus))
+ total_length = per_gpu_length * self.num_gpus
+ return total_length
+
+ def load_annotations(
+ self,
+ deeplake_dataset,
+ imgs=None,
+ labels=None,
+ masks=None,
+ masks_type_kind=None,
+ bboxes=None,
+ iscrowds=None,
+ class_names=None,
+ bbox_format=None,
+ ):
+ """Load annotation from COCO style annotation file.
+
+ Args:
+ deeplake_dataset (dp.Dataset): Deeplake dataset object.
+ imgs (dp.Tensor): image deeplake tensor.
+ labels (List[numpy]): List of labels for every every detection for each image in numpy format.
+ masks (List[numpy]): List of masks for every every detection for each image in numpy format.
+ bboxes (List[numpy]): List of bboxes for every every detection for each image in numpy.
+ iscrowds (List[numpy]): List of iscrowds for every every detection for each image in numpy format.
+ class_names (List[str]): List of class names for every every detection for each image.
+ bbox_format (Dict[Dict[str, str]]): Dictionary contatining bbox format information.
+
+ Returns:
+ list[dict]: Annotation info from COCO api.
+ """
+
+ self.coco = DeeplakeCOCO(
+ deeplake_dataset,
+ imgs=imgs,
+ labels=labels,
+ bboxes=bboxes,
+ masks=masks,
+ masks_type_kind=masks_type_kind,
+ iscrowds=iscrowds,
+ class_names=class_names,
+ bbox_format=bbox_format,
+ )
+ # The order of returned `cat_ids` will not
+ # change with the order of the CLASSES
+ self.cat_ids = self.coco.get_cat_ids(cat_names=self.CLASSES)
+
+ self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
+ self.img_ids = self.coco.get_img_ids()
+ data_infos = []
+ total_ann_ids = []
+ for i in self.img_ids:
+ info = self.coco.load_imgs([i])[0]
+ data_infos.append(info)
+ ann_ids = self.coco.get_ann_ids(img_ids=[i])
+ total_ann_ids.extend(ann_ids)
+ assert len(set(total_ann_ids)) == len(total_ann_ids)
+ return data_infos
+
+
+def convert_poly_to_coco_format(masks):
+ if isinstance(masks, np.ndarray):
+ px = masks[..., 0]
+ py = masks[..., 1]
+ poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
+ poly = [[float(p) for x in poly for p in x]]
+ return poly
+ poly = []
+ for mask in masks:
+ poly_i = convert_poly_to_coco_format(mask)
+ poly.append([np.array(poly_i[0])])
+ return poly
diff --git a/python/deeplake/integrations/mmdet/test_.py b/python/deeplake/integrations/mmdet/test_.py
new file mode 100644
index 0000000000..c574bbc253
--- /dev/null
+++ b/python/deeplake/integrations/mmdet/test_.py
@@ -0,0 +1,225 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import pickle
+import shutil
+import tempfile
+import time
+
+import mmcv
+import torch
+import torch.distributed as dist
+from mmcv.image import tensor2imgs
+from mmcv.runner import get_dist_info
+
+from mmdet.core import encode_mask_results
+from mmdet.utils import get_device
+
+
+def single_gpu_test(
+ model,
+ data_loader,
+ show=False,
+ out_dir=None,
+ show_score_thr=0.3,
+ show_box_only=False,
+ show_mask_only=False,
+):
+ model.eval()
+ results = []
+ dataset = data_loader.dataset.mmdet_dataset
+ PALETTE = getattr(dataset, "PALETTE", None)
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ result = model(return_loss=False, rescale=True, **data)
+
+ batch_size = len(result)
+ if show or out_dir:
+ if batch_size == 1 and isinstance(data["img"][0], torch.Tensor):
+ img_tensor = data["img"][0]
+ else:
+ img_tensor = data["img"][0].data[0]
+ img_metas = data["img_metas"][0].data[0]
+ imgs = tensor2imgs(img_tensor, **img_metas[0]["img_norm_cfg"])
+ assert len(imgs) == len(img_metas)
+
+ for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
+ h, w, _ = img_meta["img_shape"]
+ img_show = img[:h, :w, :]
+
+ ori_h, ori_w = img_meta["ori_shape"][:-1]
+ img_show = mmcv.imresize(img_show, (ori_w, ori_h))
+
+ if out_dir:
+ out_file = osp.join(out_dir, img_meta["ori_filename"])
+ else:
+ out_file = None
+
+ model.module.show_result(
+ img_show,
+ result[i],
+ bbox_color=PALETTE,
+ text_color=PALETTE,
+ mask_color=PALETTE,
+ show=show,
+ out_file=out_file,
+ score_thr=show_score_thr,
+ show_box_only=show_box_only,
+ show_mask_only=show_mask_only,
+ )
+
+ # encode mask results
+ if isinstance(result[0], tuple):
+ result = [
+ (bbox_results, encode_mask_results(mask_results))
+ for bbox_results, mask_results in result
+ ]
+ # This logic is only used in panoptic segmentation test.
+ elif isinstance(result[0], dict) and "ins_results" in result[0]:
+ for j in range(len(result)):
+ bbox_results, mask_results = result[j]["ins_results"]
+ result[j]["ins_results"] = (
+ bbox_results,
+ encode_mask_results(mask_results),
+ )
+
+ results.extend(result)
+
+ for _ in range(batch_size):
+ prog_bar.update()
+ return results
+
+
+def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+ """Test model with multiple gpus.
+
+ This method tests model with multiple gpus and collects the results
+ under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+ it encodes results to gpu tensors and use gpu communication for results
+ collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+ and collects them by the rank 0 worker.
+
+ Args:
+ model (nn.Module): Model to be tested.
+ data_loader (nn.Dataloader): Pytorch data loader.
+ tmpdir (str): Path of directory to save the temporary results from
+ different gpus under cpu mode.
+ gpu_collect (bool): Option to use either gpu or cpu to collect results.
+
+ Returns:
+ list: The prediction results.
+ """
+ model.eval()
+ results = []
+ dataset = data_loader.dataset.mmdet_dataset
+ rank, world_size = get_dist_info()
+ if rank == 0:
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ time.sleep(2) # This line can prevent deadlock problem in some cases.
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ result = model(return_loss=False, rescale=True, **data)
+ # encode mask results
+ if isinstance(result[0], tuple):
+ result = [
+ (bbox_results, encode_mask_results(mask_results))
+ for bbox_results, mask_results in result
+ ]
+ # This logic is only used in panoptic segmentation test.
+ elif isinstance(result[0], dict) and "ins_results" in result[0]:
+ for j in range(len(result)):
+ bbox_results, mask_results = result[j]["ins_results"]
+ result[j]["ins_results"] = (
+ bbox_results,
+ encode_mask_results(mask_results),
+ )
+
+ results.extend(result)
+
+ if rank == 0:
+ batch_size = len(result)
+ for _ in range(batch_size * world_size):
+ prog_bar.update()
+
+ # collect results from all ranks
+ if gpu_collect:
+ results = collect_results_gpu(results, len(dataset))
+ else:
+ results = collect_results_cpu(results, len(dataset), tmpdir)
+ return results
+
+
+def collect_results_cpu(result_part, size, tmpdir=None):
+ rank, world_size = get_dist_info()
+ default_device = get_device()
+ # create a tmp dir if it is not specified
+ if tmpdir is None:
+ MAX_LEN = 512
+ # 32 is whitespace
+ dir_tensor = torch.full(
+ (MAX_LEN,), 32, dtype=torch.uint8, device=default_device
+ )
+ if rank == 0:
+ mmcv.mkdir_or_exist(".dist_test")
+ tmpdir = tempfile.mkdtemp(dir=".dist_test")
+ tmpdir = torch.tensor(
+ bytearray(tmpdir.encode()), dtype=torch.uint8, device=default_device
+ )
+ dir_tensor[: len(tmpdir)] = tmpdir
+ dist.broadcast(dir_tensor, 0)
+ tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+ else:
+ mmcv.mkdir_or_exist(tmpdir)
+ # dump the part result to the dir
+ mmcv.dump(result_part, osp.join(tmpdir, f"part_{rank}.pkl"))
+ dist.barrier()
+ # collect all parts
+ if rank != 0:
+ return None
+ else:
+ # load results of all parts from tmp dir
+ part_list = []
+ for i in range(world_size):
+ part_file = osp.join(tmpdir, f"part_{i}.pkl")
+ part_list.append(mmcv.load(part_file))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ # remove tmp dir
+ shutil.rmtree(tmpdir)
+ return ordered_results
+
+
+def collect_results_gpu(result_part, size):
+ rank, world_size = get_dist_info()
+ default_device = get_device()
+ # dump result part to tensor with pickle
+ part_tensor = torch.tensor(
+ bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device=default_device
+ )
+ # gather all result part tensor shape
+ shape_tensor = torch.tensor(part_tensor.shape, device=default_device)
+ shape_list = [shape_tensor.clone() for _ in range(world_size)]
+ dist.all_gather(shape_list, shape_tensor)
+ # padding result part tensor to max length
+ shape_max = torch.tensor(shape_list).max()
+ part_send = torch.zeros(shape_max, dtype=torch.uint8, device=default_device)
+ part_send[: shape_tensor[0]] = part_tensor
+ part_recv_list = [part_tensor.new_zeros(shape_max) for _ in range(world_size)]
+ # gather all result part
+ dist.all_gather(part_recv_list, part_send)
+
+ if rank == 0:
+ part_list = []
+ for recv, shape in zip(part_recv_list, shape_list):
+ part_list.append(pickle.loads(recv[: shape[0]].cpu().numpy().tobytes()))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ return ordered_results
diff --git a/python/deeplake/integrations/mmseg/__init__.py b/python/deeplake/integrations/mmseg/__init__.py
new file mode 100644
index 0000000000..29d7586f84
--- /dev/null
+++ b/python/deeplake/integrations/mmseg/__init__.py
@@ -0,0 +1,2 @@
+from deeplake.integrations.mmseg.mmseg_ import train_segmentor
+from mmseg.models import build_segmentor # type: ignore
diff --git a/python/deeplake/integrations/mmseg/compose_transform_.py b/python/deeplake/integrations/mmseg/compose_transform_.py
new file mode 100644
index 0000000000..0dabdc657b
--- /dev/null
+++ b/python/deeplake/integrations/mmseg/compose_transform_.py
@@ -0,0 +1,78 @@
+import io
+import numpy as np
+from typing import Callable, Optional, List
+from functools import partial
+
+from deeplake.integrations.mm.exceptions import InvalidImageError, InvalidSegmentError
+from deeplake.integrations.mm.upcast_array import upcast_array
+from mmcv.utils import build_from_cfg
+from mmseg.datasets.builder import PIPELINES # type: ignore
+from mmseg.datasets.pipelines import Compose # type: ignore
+
+
+def build_pipeline(steps):
+ return Compose(
+ [
+ build_from_cfg(step, PIPELINES, None)
+ for step in steps
+ if step["type"] not in {"LoadImageFromFile", "LoadAnnotations"}
+ ]
+ )
+
+
+def transform(
+ sample_in,
+ images_tensor: str,
+ masks_tensor: str,
+ pipeline: Callable,
+):
+ try:
+ img = upcast_array(sample_in[images_tensor])
+ except Exception as e:
+ raise InvalidImageError(images_tensor, e)
+ if isinstance(img, (bytes, bytearray)):
+ img = np.array(Image.open(io.BytesIO(img)))
+ elif not isinstance(img, np.ndarray):
+ img = np.array(img)
+
+ try:
+ mask = sample_in[masks_tensor]
+ except Exception as e:
+ raise InvalidSegmentMaskError(images_tensor, e)
+ if not isinstance(mask, np.ndarray):
+ mask = np.array(mask)
+
+ if img.ndim == 2:
+ img = np.expand_dims(img, -1)
+
+ img = img[..., ::-1] # rgb_to_bgr should be optional
+ if img.shape[2] == 1:
+ img = np.repeat(img, 3, axis=2)
+ shape = img.shape
+
+ pipeline_dict = {
+ "img": np.ascontiguousarray(img, dtype=np.float32),
+ "img_fields": ["img"],
+ "filename": None,
+ "ori_filename": None,
+ "img_shape": shape,
+ "ori_shape": shape,
+ "gt_semantic_seg": np.ascontiguousarray(mask, np.int64),
+ "seg_fields": ["gt_semantic_seg"],
+ }
+
+ return pipeline(pipeline_dict)
+
+
+def compose_transform(
+ images_tensor: str,
+ masks_tensor: Optional[str],
+ pipeline: List,
+):
+ pipeline = build_pipeline(pipeline)
+ return partial(
+ transform,
+ images_tensor=images_tensor,
+ masks_tensor=masks_tensor,
+ pipeline=pipeline,
+ )
diff --git a/python/deeplake/integrations/mmseg/mmseg_.py b/python/deeplake/integrations/mmseg/mmseg_.py
new file mode 100644
index 0000000000..be709fdbe7
--- /dev/null
+++ b/python/deeplake/integrations/mmseg/mmseg_.py
@@ -0,0 +1,739 @@
+"""
+Deep Lake offers an integration with MMSegmentation, a popular open-source semantic segmentation toolbox based on PyTorch.
+The integration enables users to train models while streaming Deep Lake dataset using the transformation, training, and evaluation tools built by MMSeg.
+
+Learn more about MMSegmentation `here `_.
+
+Integration Interface
+~~~~~~~~~~~~~~~~~~~~~
+MMSegmentation works with configs. Deeplake adopted this strategy, and in order to train MMSeg models, you need to create/specify your model
+and training/validation config. Deep Lake integration's logic is almost the same as MMSegmentation's with some minor modifications. The integrations
+with MMSeg occurs in the deeplake.integrations.mmseg module. At a high-level, Deep Lake is responsible for the pytorch dataloader that streams data
+to the training framework, while MMSeg is used for the training, transformation, and evaluation logic. Let us take a look at the config with deeplake changes:
+
+Deeplake integration requires the following parameters to be specified in the configuration file:
+
+- ``data``: Just like in the MMSegmentation configuration files, in data dictionary you can specify everything that you want to be applied to the data during training and validation
+ - ``train``: Keyword argument of data, a dictionary where one can specify dataset path, credentials, transformations of the training data
+ - ``val``: Keyword argument of data, a dictionary where one can specify dataset path, credentials, transformations of the validation data
+ - ``pipeline``: List of transformations. This parameter exists for train as well as for val.
+
+ - Example:
+
+ >>> pipeline = [dict(type="Resize", img_scale=[(320, 320), (608, 608)], keep_ratio=True), dict(type="RandomFlip", flip_ratio=0.5), dict(type="PhotoMetricDistortion")]
+
+ - ``deeplake_path``: Path to the deeplake dataset. This parameter exists for train as well as for val.
+ - ``deeplake_credentials``: Optional parameter. Required only when using private nonlocal datasets. See documendataion for `deeplake.open_read_only() https://docs.deeplake.ai/latest/api/dataset/#deeplake.open_read_only`_ for details. This parameter exists for train as well as for val.
+ - ``deeplake_tag_id``: Optional parameter. If specified, the dataset will checkout to the commit. This parameter exists for train as well as for val. See documentation for `Dataset.commit_id `_
+ - ``deeplake_query``: Optional paramterer. If specified, the dataset can be loaded from the query is dataset_path was not been specified as well as the applied on that dataset of dataset_path was specified before
+ - ``deeplake_tensors``: Optional parameter. If specified maps MMSegmentation tensors to the associated tensors in the dataset. MMSeg tensors are: "img", "gt_semantic_seg". This parameter exists for train as well as for val.
+ - ``"img"``: Stands for image tensor.
+ - ``"gt_semantic_seg"``: Stands for semantic segmenataion tensor.
+
+ - ``deeplake_dataloader``: Optional parameter. If specified represents the parameters of the deeplake dataloader. Deeplake dataloader parameters are: "shuffle", "batch_size", "num_workers". This parameter exists for train as well as for val.
+ - ``"shuffle"``: If ``True`` shuffles the dataset.
+ - ``"batch_size"``: Size of batch. If not specified, dataloader will use ``samples_per_gpu``.
+ - ``"num_workers"``: Number of workers to use. If not specified, dataloader will use ``workers_per_gpu``.
+
+Example:
+
+>>> evaluation = dict(metric=["mIoU"], interval=1)
+
+- ``train_segmentor``: Function to train the MMSegmentation model.
+
+ Parameters:
+
+ - ``model``: MMSegmentation model that is going to be used.
+ - ``cfg``: mmcv.ConfigDict, Configuration of the model as well as of the datasets and transforms that's going to be used.
+ - ``ds_train``: Optional parameter. If provided will overwrite deeplake_path in train, and will pass this tensor directly to the dataloader.
+ - ``ds_val``: Optional parameter. If provided will overwrite deeplake_path in val, and will pass this tensor directly to the dataloader.
+ - ``ds_train_tensors``: Optional parameter. If provided will overwrite deeplake_tensors in train, and will pass this tensor mapping directly to dataloader.
+ - ``ds_val_tensors``: Optional parameter. If provided will overwrite deeplake_tensors in val, and will pass this tensor mapping directly to dataloader.
+ - ``distributed``: Optional parameter. If provided will run the code on all available gpus. Meta data used to build runner.
+ - ``timestamp``: Variable used in runner to make .log and .log.json filenames the same.
+ - ``validate``: Bool, whether validation should be run, defaults to ``True``.
+
+
+MMSegmentation Config Examples
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Below is the example of the deeplake mmseg configuration:
+
+
+>>> _base_ = "../mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k.py"
+>>> # use caffe img_norm
+>>> img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True)
+>>> train_pipeline = [
+... dict(type='LoadImageFromFile'),
+... dict(type='LoadAnnotations'),
+... dict(
+... type='Expand',
+... mean=img_norm_cfg['mean'],
+... to_rgb=img_norm_cfg['to_rgb'],
+... ratio_range=(1, 2)),
+... dict(type='Resize', img_scale=[(320, 320), (416, 416)], keep_ratio=True),
+... dict(type='RandomFlip', flip_ratio=0.0),
+... dict(type='PhotoMetricDistortion'),
+... dict(type='Normalize', **img_norm_cfg),
+... dict(type='Pad', size_divisor=32),
+... dict(type='DefaultFormatBundle'),
+... dict(type='Collect', keys=['img', 'gt_semantic_seg'])
+... ]
+>>> test_pipeline = [
+... dict(type='LoadImageFromFile'),
+... dict(
+... type='MultiScaleFlipAug',
+... img_scale=(416, 416),
+... flip=False,
+... transforms=[
+... dict(type='Resize', keep_ratio=True),
+... dict(type='RandomFlip', flip_ratio=0.0),
+... dict(type='Normalize', **img_norm_cfg),
+... dict(type='Pad', size_divisor=32),
+... dict(type='ImageToTensor', keys=['img']),
+... dict(type='Collect', keys=['img'])
+... ])
+... ]
+>>> #--------------------------------------DEEPLAKE INPUTS------------------------------------------------------------#
+>>> TOKEN = "INSERT_YOUR_DEEPLAKE_TOKEN"
+>>> data = dict(
+... # samples_per_gpu=4, # Is used instead of batch_size if deeplake_dataloader is not specified below
+... # workers_per_gpu=8, # Is used instead of num_workers if deeplake_dataloader is not specified below
+... train=dict(
+... pipeline=train_pipeline,
+... # Credentials for authentication. See documendataion for deeplake.load() for details
+... deeplake_path="hub://activeloop/semantic-seg-train",
+... deeplake_credentials={
+... "token": TOKEN,
+... "creds": None,
+... },
+... #OPTIONAL - Checkout the specified commit_id before training
+... deeplake_commit_id="",
+... #OPTIONAL - Loads a dataset tag for training based on tag_id
+... deeplake_tag_id="",
+... # OPTIONAL - {"mmseg_key": "deep_lake_tensor",...} - Maps Deep Lake tensors to MMSeg dictionary keys.
+... # If not specified, Deep Lake will auto-infer the mapping, but it might make mistakes if datasets have many tensors
+... deeplake_tensors = {"img": "images", "gt_semantic_seg": "semantic_seg"},
+... # OPTIONAL - Parameters to use for the Deep Lake dataloader. If unspecified, the integration uses
+... # the parameters in other parts of the cfg file such as samples_per_gpu, and others.
+... deeplake_dataloader = {"shuffle": True, "batch_size": 4, 'num_workers': 8}
+... ),
+... # Parameters as the same as for train
+... val=dict(
+... pipeline=test_pipeline,
+... deeplake_path="hub://activeloop/semantic-seg-val",
+... deeplake_credentials={
+... "token": TOKEN,
+... "creds": None,
+... },
+... deeplake_tensors = {"img": "images", "gt_semantic_seg": "semantic_seg"},
+... deeplake_dataloader = {"shuffle": False, "batch_size": 1, 'num_workers': 8}
+... ),
+... )
+>>> # Which dataloader to use
+>>> # Which metrics to use for evaulation. In MMSeg (without Deeplake), this is inferred from the dataset type.
+>>> # In the Deep Lake integration, since the format is standardized, a variety of metrics can be used for a given dataset.
+>>> #----------------------------------END DEEPLAKE INPUTS------------------------------------------------------------#
+
+And config for training:
+
+>>> import os
+>>> from mmcv import Config
+>>> import mmcv
+>>> from deeplake.integrations import mmseg as mmseg_deeplake
+>>> cfg = Config.fromfile(cfg_file)
+>>> # Build the segmentor
+>>> model = mmseg_deeplake.build_segmentor(cfg.model)
+>>> # Create work_dir
+>>> mmcv.mkdir_or_exist(os.path.abspath(cfg.work_dir))
+>>> # Run the training
+>>> mmseg_deeplake.train_segmentor(model, cfg, distributed=args.distributed, validate=args.validate)
+"""
+
+import warnings
+import torch
+import numpy as np
+import io
+import math
+import types
+from functools import partial
+
+from typing import Callable, Optional, List, Dict, Sequence, Union
+from PIL import Image # type: ignore
+
+from mmseg.core import DistEvalHook, EvalHook # type: ignore
+from mmseg.core import build_optimizer
+from mmseg.utils import ( # type: ignore
+ build_dp,
+ find_latest_checkpoint,
+ get_root_logger,
+)
+
+from mmseg.datasets.samplers import DistributedSampler # type: ignore
+from mmseg.utils.util_distribution import * # type: ignore
+from deeplake.integrations.mm.get_indexes import get_indexes
+from deeplake.integrations.mm.worker_init_fn import worker_init_fn
+from deeplake.integrations.mm.ipc import _get_free_port
+from deeplake.integrations.mm.exceptions import ValidationDatasetMissingError
+
+from mmcv.utils import build_from_cfg, digit_version # type: ignore
+from mmcv.parallel import collate # type: ignore
+import mmcv # type: ignore
+from mmcv.runner import init_dist # type: ignore
+from mmcv.runner import ( # type: ignore
+ DistSamplerSeedHook,
+ EpochBasedRunner,
+ OptimizerHook,
+ build_runner,
+ get_dist_info,
+ HOOKS,
+)
+
+
+import deeplake as dp
+from deeplake.types import TypeKind
+from deeplake.integrations.mm.warnings import always_warn
+
+from deeplake.integrations.mm.mm_runners import DeeplakeIterBasedRunner
+from deeplake.integrations.mm.mm_common import (
+ load_ds_from_cfg,
+ get_collect_keys,
+ check_persistent_workers,
+ find_image_tensor,
+ find_smask_tensor,
+ ddp_setup,
+ force_cudnn_initialization,
+ check_unsupported_functionalities,
+ get_pipeline,
+)
+from deeplake.integrations.mmseg.mmseg_dataset_ import MMSegDataset, MMSegTorchDataset
+from deeplake.integrations.mmseg.compose_transform_ import compose_transform
+
+from torch.utils.data import DataLoader, IterableDataset
+
+
+# Monkey-patch the function
+from deeplake.integrations.mmseg.test_ import single_gpu_test as custom_single_gpu_test
+from deeplake.integrations.mmseg.test_ import multi_gpu_test as custom_multi_gpu_test
+
+import mmseg.apis
+
+mmseg.apis.single_gpu_test = custom_single_gpu_test
+mmseg.apis.multi_gpu_test = custom_multi_gpu_test
+
+
+def build_ddp(model, device, *args, **kwargs):
+ """Build DistributedDataParallel module by device type.
+
+ If device is cuda, return a MMDistributedDataParallel model;
+ if device is mlu, return a MLUDistributedDataParallel model.
+
+ Args:
+ model (:class:`nn.Module`): module to be parallelized.
+ device (str): device type, mlu or cuda.
+ args (List): arguments to be passed to ddp_factory
+ kwargs (dict): keyword arguments to be passed to ddp_factory
+
+ Returns:
+ :class:`nn.Module`: the module to be parallelized
+
+ References:
+ .. [1] https://pytorch.org/docs/stable/generated/torch.nn.parallel.
+ DistributedDataParallel.html
+ """
+
+ assert device in ["cuda", "mlu"], "Only available for cuda or mlu devices."
+ if device == "cuda":
+ model = model.cuda(kwargs["device_ids"][0]) # patch
+ elif device == "mlu":
+ from mmcv.device.mlu import MLUDistributedDataParallel # type: ignore
+
+ ddp_factory["mlu"] = MLUDistributedDataParallel
+ model = model.mlu()
+
+ return ddp_factory[device](model, *args, **kwargs)
+
+
+def mmseg_subiterable_dataset_eval(
+ self,
+ *args,
+ **kwargs,
+):
+ return self.dataset.mmseg_dataset.evaluate(*args, **kwargs)
+
+
+def train_segmentor(
+ model,
+ cfg: mmcv.ConfigDict,
+ ds_train=None,
+ ds_train_tensors=None,
+ ds_val: Optional[dp.Dataset] = None,
+ ds_val_tensors=None,
+ distributed: bool = False,
+ timestamp=None,
+ meta=None,
+ validate: bool = True,
+):
+ """
+ Creates runner and trains evaluates the model:
+ Args:
+ model: model to train, should be built before passing
+ cfg: mmcv.ConfigDict object containing all necessary configuration.
+ In cfg we have several changes to support deeplake integration:
+ _base_: still serves as a base model to inherit from
+ data: where everything related to data processing, you will need to specify the following parameters:
+ train: everything related to training data, it has the following attributes:
+ pipeline: dictionary where all training augmentations and transformations should be specified, like in mmdet
+ deeplake_tensors: dictionary that maps mmseg keys to deeplake dataset tensor. Example: `{"img": "images", "gt_semantic_seg": "semantic_seg"}`.
+ If this dictionary is not specified, these tensors will be searched automatically using htypes like "image" and "segment_mask".
+ keys that needs to be mapped are: `img` and "gt_semantic_seg". `img` and `gt_semantic_seg` are always required, if they not specified they
+ are always searched, if you specify in collect `gt_semantic_seg` then you need to either specify it in config or it will be searched based on
+ `segment_mask` htype.
+ deeplake_credentials: dictionary with deeplake credentials that allow you to access the specified data. It has following arguments: `token`.
+ `token` is the token that gives you read or write access to the datasets. It is available in your personal account on: https://www.activeloop.ai/.
+ val (Optional): everything related to validating data, it has the following attributes:
+ pipeline: dictionary where all training augmentations and transformations should be specified, like in mmdet
+ deeplake_tensors: dictionary that maps mmseg keys to deeplake dataset tensor. Example: `{"img": "images", "gt_semantic_seg": "semantic_seg"}`.
+ If this dictionary is not specified, these tensors will be searched automatically using htypes like "image" and "segment_mask".
+ keys that needs to be mapped are: `img` and "gt_semantic_seg". `img` and `gt_semantic_seg` are always required, if they not specified they
+ are always searched, if you specify in collect `gt_semantic_seg` then you need to either specify it in config or it will be searched based on
+ `segment_mask` htype.
+ deeplake_credentials: deeplake credentials that allow you to access the specified data. It has following arguments: `token`.
+ `token` is the token that gives you read or write access to the datasets. It is available in your personal account on: https://www.activeloop.ai/.
+ test (Optional): everything related to testing data, it has the following attributes:
+ pipeline: dictionary where all training augmentations and transformations should be specified, like in mmdet
+ deeplake_tensors: dictionary that maps mmseg keys to deeplake dataset tensor. Example: `{"img": "images", "gt_semantic_seg": "semantic_seg"}`.
+ If this dictionary is not specified, these tensors will be searched automatically using htypes like "image" and "segment_mask".
+ keys that needs to be mapped are: `img` and "gt_semantic_seg". `img` and `gt_semantic_seg` are always required, if they not specified they
+ are always searched, if you specify in collect `gt_semantic_seg` then you need to either specify it in config or it will be searched based on
+ `segment_mask` htype.
+ deeplake_credentials: deeplake credentials that allow you to access the specified data. It has following arguments: `token`.
+ `token` is the token that gives you read or write access to the datasets. It is available in your personal acccount on: https://www.activeloop.ai/.
+ samples_per_gpu: number of samples to be processed per gpu
+ workers_per_gpu: number of workers per gpu
+ optimizer: dictionary containing information about optimizer initialization
+ optimizer_config: some optimizer configuration that might be used during training like grad_clip etc.
+ runner: training type e.g. EpochBasedRunner, here you can specify maximum number of epochs to be conducted. For instance: `runner = dict(type='EpochBasedRunner', max_epochs=273)`
+ ds_train: train dataset of type dp.Dataset. This can be a view of the dataset.
+ ds_train_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example: {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}.
+ If this dictionary is not specified, these tensors will be searched automatically using htypes like "image" and "segment_mask".
+ keys that needs to be mapped are: `img` and "gt_semantic_seg". `img` and `gt_semantic_seg` are always required, if they not specified they
+ are always searched, if you specify in collect `gt_semantic_seg` then you need to either specify it in config or it will be searched based on
+ `segment_mask` htype.
+ ds_val: validation dataset of type dp.Dataset. This can be view of the dataset.
+ ds_val_tensors: dictionary that maps mmdet keys to deeplake dataset tensor. Example: {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories"}.
+ If this dictionary is not specified, these tensors will be searched automatically using htypes like "image" and "segment_mask".
+ keys that needs to be mapped are: `img` and "gt_semantic_seg". `img` and `gt_semantic_seg` are always required, if they not specified they
+ are always searched, if you specify in collect `gt_semantic_seg` then you need to either specify it in config or it will be searched based on
+ `segment_mask` htype.
+ evaluation: dictionary that contains all information needed for evaluation apart from data processing, like how often evaluation should be done and what metrics we want to use.
+ For instance, `evaluation = dict(interval=1, metric=['mIoU'])`
+ distributed: bool, whether ddp training should be started, by default `False`
+ timestamp: variable used in runner to make .log and .log.json filenames the same
+ meta: meta data used to build runner
+ validate: bool, whether validation should be conducted, by default `True`
+ """
+ check_unsupported_functionalities(cfg)
+
+ if not hasattr(cfg, "gpu_ids"):
+ cfg.gpu_ids = range(torch.cuda.device_count() if distributed else 1)
+ if distributed:
+ return torch.multiprocessing.spawn(
+ _train_segmentor,
+ args=(
+ model,
+ cfg,
+ ds_train,
+ ds_train_tensors,
+ ds_val,
+ ds_val_tensors,
+ distributed,
+ timestamp,
+ meta,
+ validate,
+ _get_free_port(),
+ ),
+ nprocs=len(cfg.gpu_ids),
+ )
+ _train_segmentor(
+ 0,
+ model,
+ cfg,
+ ds_train,
+ ds_train_tensors,
+ ds_val,
+ ds_val_tensors,
+ distributed,
+ timestamp,
+ meta,
+ validate,
+ )
+
+
+def register_validation_hook_(
+ batch_size: int,
+ num_workers: int,
+ distributed: bool,
+ cfg: mmcv.ConfigDict,
+ ignore_index: int,
+ reduce_zero_label: bool,
+ train_persistent_workers: bool = False,
+ ds_val: Optional[dp.Dataset] = None,
+ ds_val_tensors=None,
+ runner=None,
+):
+ eval_cfg = cfg.get("evaluation", {})
+ val_dataloader_default_args = dict(
+ samples_per_gpu=batch_size,
+ workers_per_gpu=num_workers,
+ dist=distributed,
+ shuffle=False,
+ mode="val",
+ seed=cfg.seed,
+ num_gpus=len(cfg.gpu_ids),
+ ignore_index=ignore_index,
+ reduce_zero_label=reduce_zero_label,
+ )
+
+ val_dataloader_args = {
+ **cfg.data.val.get("deeplake_dataloader", {}),
+ **val_dataloader_default_args,
+ }
+
+ val_persistent_workers = val_dataloader_args.get("persistent_workers", False)
+ check_persistent_workers(train_persistent_workers, val_persistent_workers)
+
+ if val_dataloader_args.get("shuffle", False):
+ always_warn("shuffle argument for validation dataset will be ignored.")
+
+ if ds_val is None:
+ cfg_ds_val = cfg.data.get("val")
+ if not cfg_ds_val or not any(
+ cfg_ds_val.get(key) is not None
+ for key in ["deeplake_path", "deeplake_query"]
+ ):
+ raise ValidationDatasetMissingError()
+ ds_val = load_ds_from_cfg(cfg.data.val)
+ ds_val_tensors = cfg.data.val.get("deeplake_tensors", {})
+ else:
+ cfg_data = cfg.data.val.get("deeplake_path")
+ if cfg_data is not None:
+ always_warn(
+ "A Deep Lake dataset was specified in the cfg as well as in the dataset input to train_segmentor. The dataset input to train_segmentor will be used in the workflow."
+ )
+
+ if ds_val is None:
+ raise ValidationDatasetMissingError()
+
+ if ds_val_tensors:
+ val_images_tensor = ds_val_tensors["img"]
+ val_masks_tensor = ds_val_tensors.get("gt_semantic_seg")
+ else:
+ val_images_tensor = find_image_tensor(ds_val, mm_class="img")
+ val_masks_tensor = None
+ collection_keys = get_collect_keys(cfg)
+ if "gt_semantic_seg" in collection_keys:
+ val_masks_tensor = find_smask_tensor(ds_val, mm_class="gt_semantic_seg")
+
+ val_pipeline = get_pipeline(cfg, name="val", generic_name="test_pipeline")
+
+ val_dataloader = build_dataloader(
+ ds_val,
+ val_images_tensor,
+ val_masks_tensor,
+ pipeline=val_pipeline,
+ **val_dataloader_args,
+ )
+
+ eval_cfg["by_epoch"] = cfg.runner["type"] != "DeeplakeIterBasedRunner"
+ eval_cfg["pre_eval"] = False
+ eval_hook = EvalHook
+ if distributed:
+ eval_hook = DistEvalHook
+ # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the
+ # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'.
+ runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority="LOW")
+
+
+def _train_segmentor(
+ local_rank,
+ model,
+ cfg: mmcv.ConfigDict,
+ ds_train=None,
+ ds_train_tensors=None,
+ ds_val: Optional[dp.Dataset] = None,
+ ds_val_tensors=None,
+ distributed: bool = False,
+ timestamp=None,
+ meta=None,
+ validate: bool = True,
+ port=None,
+):
+ batch_size = cfg.data.get("samples_per_gpu", 256)
+ num_workers = cfg.data.get("workers_per_gpu", 1)
+
+ ignore_index = cfg.get("ignore_index", 255)
+ reduce_zero_label = cfg.get("reduce_zero_label", False)
+
+ if ds_train is None:
+ ds_train = load_ds_from_cfg(cfg.data.train)
+ ds_train_tensors = cfg.data.train.get("deeplake_tensors", {})
+ else:
+ cfg_data = cfg.data.train.get("deeplake_path")
+ if cfg_data:
+ always_warn(
+ "A Deep Lake dataset was specified in the cfg as well as in the dataset input to train_segmentor. The dataset input to train_segmentor will be used in the workflow."
+ )
+
+ if ds_train_tensors:
+ train_images_tensor = ds_train_tensors["img"]
+ train_masks_tensor = ds_train_tensors.get("gt_semantic_seg")
+ else:
+ train_images_tensor = find_image_tensor(ds_train, mm_class="img")
+ train_masks_tensor = None
+
+ collection_keys = get_collect_keys(cfg)
+ if "gt_semantic_seg" in collection_keys:
+ train_masks_tensor = find_smask_tensor(ds_train, mm_class="gt_semantic_seg")
+
+ model.CLASSES = ds_train[train_masks_tensor].metadata["class_names"]
+
+ logger = get_root_logger(log_level=cfg.log_level)
+ runner_type = "EpochBasedRunner" if "runner" not in cfg else cfg.runner["type"]
+
+ train_dataloader_default_args = dict(
+ samples_per_gpu=batch_size,
+ workers_per_gpu=num_workers,
+ # `num_gpus` will be ignored if distributed
+ num_gpus=len(cfg.gpu_ids),
+ dist=distributed,
+ seed=cfg.seed,
+ runner_type=runner_type,
+ ignore_index=ignore_index,
+ reduce_zero_label=reduce_zero_label,
+ )
+
+ train_loader_cfg = {
+ **train_dataloader_default_args,
+ **cfg.data.get("train_dataloader", {}),
+ **cfg.data.train.get("deeplake_dataloader", {}),
+ }
+
+ # # put model on gpus
+ if distributed:
+ find_unused_parameters = cfg.get("find_unused_parameters", False)
+ # Sets the `find_unused_parameters` parameter in
+ # # torch.nn.parallel.DistributedDataParallel
+ # model = torch.nn.parallel.DistributedDataParallel(model.cuda(),
+ # device_ids=[local_rank],
+ # output_device=local_rank,
+ # broadcast_buffers=False,
+ # find_unused_parameters=find_unused_parameters)
+ force_cudnn_initialization(cfg.gpu_ids[local_rank])
+ ddp_setup(local_rank, len(cfg.gpu_ids), port)
+ model = build_ddp(
+ model,
+ cfg.device,
+ device_ids=[cfg.gpu_ids[local_rank]],
+ broadcast_buffers=False,
+ find_unused_parameters=find_unused_parameters,
+ )
+ else:
+ model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids)
+
+ train_pipeline = get_pipeline(cfg, name="train", generic_name="train_pipeline")
+
+ data_loader = build_dataloader(
+ ds_train,
+ train_images_tensor,
+ train_masks_tensor,
+ pipeline=train_pipeline,
+ **train_loader_cfg,
+ )
+
+ # build optimizer
+ optimizer = build_optimizer(model, cfg.optimizer)
+
+ # check runner
+ cfg.custom_imports = dict(
+ imports=["deeplake.integrations.mm.mm_runners"],
+ allow_failed_imports=False,
+ )
+ if cfg.runner.type == "IterBasedRunner":
+ cfg.runner.type = "DeeplakeIterBasedRunner"
+ elif cfg.runner.type == "EpochBasedRunner":
+ cfg.runner.type = "DeeplakeEpochBasedRunner"
+
+ runner = build_runner(
+ cfg.runner,
+ default_args=dict(
+ model=model,
+ optimizer=optimizer,
+ work_dir=cfg.work_dir,
+ logger=logger,
+ meta=meta,
+ force_cleanup=False,
+ ),
+ )
+
+ # an ugly workaround to make .log and .log.json filenames the same
+ runner.timestamp = timestamp
+
+ if distributed and "type" not in cfg.optimizer_config:
+ optimizer_config = OptimizerHook(**cfg.optimizer_config)
+ else:
+ optimizer_config = cfg.optimizer_config
+
+ # register hooks
+ runner.register_training_hooks(
+ cfg.lr_config,
+ optimizer_config,
+ cfg.checkpoint_config,
+ cfg.log_config,
+ cfg.get("momentum_config", None),
+ )
+
+ if distributed and isinstance(runner, EpochBasedRunner):
+ runner.register_hook(DistSamplerSeedHook())
+
+ # register eval hooks
+ if validate:
+ register_validation_hook_(
+ batch_size=batch_size,
+ num_workers=num_workers,
+ distributed=distributed,
+ train_persistent_workers=train_loader_cfg.get("persistent_workers", False),
+ cfg=cfg,
+ ignore_index=ignore_index,
+ reduce_zero_label=reduce_zero_label,
+ ds_val=ds_val,
+ ds_val_tensors=ds_val_tensors,
+ runner=runner,
+ )
+
+ # user-defined hooks
+ if cfg.get("custom_hooks", None):
+ custom_hooks = cfg.custom_hooks
+ assert isinstance(
+ custom_hooks, list
+ ), f"custom_hooks expect list type, but got {type(custom_hooks)}"
+ for hook_cfg in cfg.custom_hooks:
+ assert isinstance(hook_cfg, dict), (
+ "Each item in custom_hooks expects dict type, but got "
+ f"{type(hook_cfg)}"
+ )
+ hook_cfg = hook_cfg.copy()
+ priority = hook_cfg.pop("priority", "NORMAL")
+ hook = build_from_cfg(hook_cfg, HOOKS)
+ runner.register_hook(hook, priority=priority)
+
+ resume_from = None
+ if cfg.resume_from is None and cfg.get("auto_resume"):
+ resume_from = find_latest_checkpoint(cfg.work_dir)
+ if resume_from is not None:
+ cfg.resume_from = resume_from
+
+ if cfg.resume_from:
+ runner.resume(cfg.resume_from)
+ elif cfg.load_from:
+ runner.load_checkpoint(cfg.load_from)
+ runner.run([data_loader], cfg.workflow)
+
+
+def build_dataloader(
+ dataset: dp.Dataset,
+ images_tensor: str,
+ masks_tensor: Optional[str],
+ pipeline: List,
+ mode: str = "train",
+ **loader_config,
+):
+ persistent_workers = loader_config.get("persistent_workers", False)
+ _ = loader_config.get("ignore_index")
+ _ = loader_config.get("reduce_zero_label")
+ dist = loader_config["dist"]
+ seed = loader_config["seed"]
+ transform_fn = compose_transform(
+ images_tensor=images_tensor, masks_tensor=masks_tensor, pipeline=pipeline
+ )
+
+ num_workers = loader_config.get("num_workers")
+ pin_memory = loader_config.get("pin_memory", False)
+ if num_workers is None:
+ num_workers = loader_config["workers_per_gpu"]
+
+ shuffle = loader_config.get("shuffle", True)
+
+ tensors_dict = {
+ "images_tensor": images_tensor,
+ }
+ tensors = [images_tensor]
+ if masks_tensor is not None:
+ tensors.append(masks_tensor)
+ tensors_dict["masks_tensor"] = masks_tensor
+
+ batch_size = loader_config.get("batch_size")
+ drop_last = loader_config.get("drop_last", False)
+ if batch_size is None:
+ batch_size = loader_config["samples_per_gpu"]
+
+ collate_fn = partial(collate, samples_per_gpu=batch_size)
+
+ mmseg_ds = MMSegDataset(
+ dataset=dataset,
+ transform=transform_fn,
+ # pipeline=pipeline,
+ tensors_dict=tensors_dict,
+ tensors=tensors,
+ mode=mode,
+ num_gpus=loader_config["num_gpus"],
+ batch_size=batch_size,
+ )
+
+ if dist:
+ rank, world_size = get_dist_info()
+ sl = get_indexes(
+ dataset, rank=rank, num_replicas=world_size, drop_last=drop_last
+ )
+ dataset = dataset.query(
+ f"select * LIMIT {sl.stop - sl.start} OFFSET {sl.start}"
+ )
+
+ pytorch_ds = MMSegTorchDataset(dataset, transform=transform_fn)
+ pytorch_ds.mmseg_dataset = mmseg_ds
+
+ init_fn = (
+ partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed)
+ if seed is not None
+ else None
+ )
+
+ if digit_version(torch.__version__) >= digit_version("1.8.0"):
+ loader = DataLoader(
+ pytorch_ds,
+ batch_size=batch_size,
+ sampler=None,
+ num_workers=num_workers,
+ collate_fn=collate_fn,
+ pin_memory=pin_memory,
+ shuffle=shuffle,
+ worker_init_fn=init_fn,
+ drop_last=drop_last,
+ persistent_workers=persistent_workers,
+ )
+ else:
+ loader = DataLoader(
+ pytorch_ds,
+ batch_size=batch_size,
+ sampler=None,
+ num_workers=num_workers,
+ collate_fn=collate_fn,
+ pin_memory=pin_memory,
+ shuffle=shuffle,
+ worker_init_fn=init_fn,
+ drop_last=drop_last,
+ )
+
+ eval_fn = partial(mmseg_subiterable_dataset_eval, loader)
+ loader.dataset.evaluate = eval_fn
+
+ return loader
diff --git a/python/deeplake/integrations/mmseg/mmseg_dataset_.py b/python/deeplake/integrations/mmseg/mmseg_dataset_.py
new file mode 100644
index 0000000000..494f730b57
--- /dev/null
+++ b/python/deeplake/integrations/mmseg/mmseg_dataset_.py
@@ -0,0 +1,239 @@
+from collections import OrderedDict
+import math
+import numpy as np
+
+from typing import Optional, Callable, Sequence
+from torch.utils.data import Dataset
+from prettytable import PrettyTable # type: ignore
+
+import mmcv
+from mmcv.utils import print_log
+from mmseg.core import eval_metrics, intersect_and_union, pre_eval_to_metrics
+
+from deeplake.integrations.mm.exceptions import InvalidImageError, InvalidSegmentError
+from deeplake.integrations.mm.upcast_array import upcast_array
+import time
+
+
+class MMSegTorchDataset(Dataset):
+ def __init__(
+ self,
+ dataset,
+ tensors=None,
+ transform: Optional[Callable] = None,
+ ) -> None:
+ super().__init__()
+ self.dataset = dataset
+ self.transform = transform
+ self.column_names = [col.name for col in self.dataset.schema.columns]
+ self.last_successful_index = -1
+
+ def __len__(self):
+ return len(self.dataset)
+
+ def __getitem__(self, idx):
+ while True:
+ try:
+ sample = self.dataset[idx]
+ result = None
+ if self.transform:
+ result = self.transform(sample)
+ else:
+ out = {}
+ for col in self.column_names:
+ out[col] = sample[col]
+ result = out
+ self.last_successful_index = idx
+ return result
+ except (InvalidImageError, InvalidSegmentError) as e:
+ print(f"Error processing data at index {idx}: {e}")
+ if self.last_successful_index == -1:
+ self.last_successful_index = idx + 1
+ idx = self.last_successful_index
+ continue
+
+
+class MMSegDataset(MMSegTorchDataset):
+ def __init__(
+ self,
+ *args,
+ tensors_dict,
+ mode="train",
+ num_gpus=1,
+ batch_size=1,
+ ignore_index=255,
+ reduce_zero_label=False,
+ **kwargs,
+ ):
+ super().__init__(*args, **kwargs)
+ self.mode = mode
+ self.num_gpus = num_gpus
+ self.batch_size = batch_size
+ self.ignore_index = ignore_index
+ self.reduce_zero_label = reduce_zero_label
+ self.masks_tensor_name = tensors_dict["masks_tensor"]
+ if self.mode in ("val", "test"):
+ self.CLASSES = self.get_classes(tensors_dict["masks_tensor"])[:]
+
+ def __len__(self):
+ if self.mode == "val":
+ per_gpu_length = math.floor(
+ len(self.dataset) / (self.batch_size * self.num_gpus)
+ )
+ total_length = per_gpu_length * self.num_gpus
+ return total_length
+ return super().__len__()
+
+ def _get_masks(self, masks_tensor):
+ if masks_tensor is None:
+ return []
+ return self.dataset[masks_tensor]
+
+ def get_classes(self, classes):
+ """Get class names of current dataset.
+
+ Args:
+ classes (str): Reresents the name of the classes tensor. Overrides the CLASSES defined by the dataset.
+
+ Returns:
+ list[str]: Names of categories of the dataset.
+ """
+ return self.dataset[classes].metadata["class_names"]
+
+ def get_gt_seg_maps(self, efficient_test=None):
+ """Get ground truth segmentation maps for evaluation."""
+ if efficient_test is not None:
+ warnings.warn(
+ "DeprecationWarning: ``efficient_test`` has been deprecated "
+ "since MMSeg v0.16, the ``get_gt_seg_maps()`` is CPU memory "
+ "friendly by default. "
+ )
+
+ mask_col = self._get_masks(self.masks_tensor_name)
+ last_successful_index = -1
+ for idx in range(len(self)):
+ try:
+ result = upcast_array(mask_col[idx])
+ last_successful_index = idx
+ yield result
+ except Exception as e:
+ print(f"Error processing mask at index {idx}: {e}")
+ if last_successful_index == -1:
+ continue
+ else:
+ yield upcast_array(mask_col[last_successful_index])
+
+ def evaluate(self, results, metric="mIoU", logger=None, gt_seg_maps=None, **kwargs):
+ """Evaluate the dataset.
+
+ Args:
+ results (list[tuple[torch.Tensor]] | list[str]): per image pre_eval
+ results or predict segmentation map for computing evaluation
+ metric.
+ metric (str | list[str]): Metrics to be evaluated. 'mIoU',
+ 'mDice' and 'mFscore' are supported.
+ logger (logging.Logger | None | str): Logger used for printing
+ related information during evaluation. Default: None.
+ gt_seg_maps (generator[ndarray]): Custom gt seg maps as input,
+ used in ConcatDataset
+
+ ..
+ # noqa: DAR101
+
+ Raises:
+ KeyError: if a specified metric format is not supported
+
+ Returns:
+ dict[str, float]: Default metrics.
+ """
+
+ if self.num_gpus > 1:
+ results_ordered = []
+ for i in range(self.num_gpus):
+ results_ordered += results[i :: self.num_gpus]
+ results = results_ordered
+
+ if isinstance(metric, str):
+ metric = [metric]
+ allowed_metrics = ["mIoU", "mDice", "mFscore"]
+ if not set(metric).issubset(set(allowed_metrics)):
+ raise KeyError("metric {} is not supported".format(metric))
+
+ eval_results = {}
+ # test a list of files
+ if mmcv.is_list_of(results, np.ndarray) or mmcv.is_list_of(results, str):
+ if gt_seg_maps is None:
+ gt_seg_maps = self.get_gt_seg_maps()
+ num_classes = len(self.CLASSES)
+ ret_metrics = eval_metrics(
+ results,
+ gt_seg_maps,
+ num_classes,
+ self.ignore_index,
+ metric,
+ label_map=dict(),
+ reduce_zero_label=self.reduce_zero_label,
+ )
+ # test a list of pre_eval_results
+ else:
+ ret_metrics = pre_eval_to_metrics(results, metric)
+
+ # Because dataset.CLASSES is required for per-eval.
+ if self.CLASSES is None:
+ class_names = tuple(range(num_classes))
+ else:
+ class_names = self.CLASSES
+
+ # summary table
+ ret_metrics_summary = OrderedDict(
+ {
+ ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2)
+ for ret_metric, ret_metric_value in ret_metrics.items()
+ }
+ )
+
+ # each class table
+ ret_metrics.pop("aAcc", None)
+ ret_metrics_class = OrderedDict(
+ {
+ ret_metric: np.round(ret_metric_value * 100, 2)
+ for ret_metric, ret_metric_value in ret_metrics.items()
+ }
+ )
+ ret_metrics_class.update({"Class": class_names})
+ ret_metrics_class.move_to_end("Class", last=False)
+
+ # for logger
+ class_table_data = PrettyTable()
+ for key, val in ret_metrics_class.items():
+ class_table_data.add_column(key, val)
+
+ summary_table_data = PrettyTable()
+ for key, val in ret_metrics_summary.items():
+ if key == "aAcc":
+ summary_table_data.add_column(key, [val])
+ else:
+ summary_table_data.add_column("m" + key, [val])
+
+ print_log("per class results:", logger)
+ print_log("\n" + class_table_data.get_string(), logger=logger)
+ print_log("Summary:", logger)
+ print_log("\n" + summary_table_data.get_string(), logger=logger)
+
+ # each metric dict
+ for key, value in ret_metrics_summary.items():
+ if key == "aAcc":
+ eval_results[key] = value / 100.0
+ else:
+ eval_results["m" + key] = value / 100.0
+
+ ret_metrics_class.pop("Class", None)
+ for key, value in ret_metrics_class.items():
+ eval_results.update(
+ {
+ key + "." + str(name): value[idx] / 100.0
+ for idx, name in enumerate(class_names)
+ }
+ )
+
+ return eval_results
diff --git a/python/deeplake/integrations/mmseg/test_.py b/python/deeplake/integrations/mmseg/test_.py
new file mode 100644
index 0000000000..fb6e1cc526
--- /dev/null
+++ b/python/deeplake/integrations/mmseg/test_.py
@@ -0,0 +1,245 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import warnings
+
+import mmcv
+import numpy as np
+import torch
+from mmcv.engine import collect_results_cpu, collect_results_gpu
+from mmcv.image import tensor2imgs
+from mmcv.runner import get_dist_info
+
+
+def np2tmp(array, temp_file_name=None, tmpdir=None):
+ """Save ndarray to local numpy file.
+
+ Args:
+ array (ndarray): Ndarray to save.
+ temp_file_name (str): Numpy file name. If 'temp_file_name=None', this
+ function will generate a file name with tempfile.NamedTemporaryFile
+ to save ndarray. Default: None.
+ tmpdir (str): Temporary directory to save Ndarray files. Default: None.
+ Returns:
+ str: The numpy file name.
+ """
+
+ if temp_file_name is None:
+ temp_file_name = tempfile.NamedTemporaryFile(
+ suffix=".npy", delete=False, dir=tmpdir
+ ).name
+ np.save(temp_file_name, array)
+ return temp_file_name
+
+
+def single_gpu_test(
+ model,
+ data_loader,
+ show=False,
+ out_dir=None,
+ efficient_test=False,
+ opacity=0.5,
+ pre_eval=False,
+ format_only=False,
+ format_args={},
+):
+ """Test with single GPU by progressive mode.
+
+ Args:
+ model (nn.Module): Model to be tested.
+ data_loader (utils.data.Dataloader): Pytorch data loader.
+ show (bool): Whether show results during inference. Default: False.
+ out_dir (str, optional): If specified, the results will be dumped into
+ the directory to save output results.
+ efficient_test (bool): Whether save the results as local numpy files to
+ save CPU memory during evaluation. Mutually exclusive with
+ pre_eval and format_results. Default: False.
+ opacity(float): Opacity of painted segmentation map.
+ Default 0.5.
+ Must be in (0, 1] range.
+ pre_eval (bool): Use dataset.pre_eval() function to generate
+ pre_results for metric evaluation. Mutually exclusive with
+ efficient_test and format_results. Default: False.
+ format_only (bool): Only format result for results commit.
+ Mutually exclusive with pre_eval and efficient_test.
+ Default: False.
+ format_args (dict): The args for format_results. Default: {}.
+ Returns:
+ list: list of evaluation pre-results or list of save file names.
+ """
+ if efficient_test:
+ warnings.warn(
+ "DeprecationWarning: ``efficient_test`` will be deprecated, the "
+ "evaluation is CPU memory friendly with pre_eval=True"
+ )
+ mmcv.mkdir_or_exist(".efficient_test")
+ # when none of them is set true, return segmentation results as
+ # a list of np.array.
+ assert [efficient_test, pre_eval, format_only].count(True) <= 1, (
+ "``efficient_test``, ``pre_eval`` and ``format_only`` are mutually "
+ "exclusive, only one of them could be true ."
+ )
+
+ model.eval()
+ results = []
+ dataset = data_loader.dataset.mmseg_dataset
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ # The pipeline about how the data_loader retrieval samples from dataset:
+ # sampler -> batch_sampler -> indices
+ # The indices are passed to dataset_fetcher to get data from dataset.
+ # data_fetcher -> collate_fn(dataset[index]) -> data_sample
+ # we use batch_sampler to get correct data idx
+ loader_indices = data_loader.batch_sampler
+
+ for batch_indices, data in zip(loader_indices, data_loader):
+ with torch.no_grad():
+ result = model(return_loss=False, **data)
+
+ if show or out_dir:
+ img_tensor = data["img"][0]
+ img_metas = data["img_metas"][0].data[0]
+ imgs = tensor2imgs(img_tensor, **img_metas[0]["img_norm_cfg"])
+ assert len(imgs) == len(img_metas)
+
+ for img, img_meta in zip(imgs, img_metas):
+ h, w, _ = img_meta["img_shape"]
+ img_show = img[:h, :w, :]
+
+ ori_h, ori_w = img_meta["ori_shape"][:-1]
+ img_show = mmcv.imresize(img_show, (ori_w, ori_h))
+
+ if out_dir:
+ out_file = osp.join(out_dir, img_meta["ori_filename"])
+ else:
+ out_file = None
+
+ model.module.show_result(
+ img_show,
+ result,
+ palette=dataset.PALETTE,
+ show=show,
+ out_file=out_file,
+ opacity=opacity,
+ )
+
+ if efficient_test:
+ result = [np2tmp(_, tmpdir=".efficient_test") for _ in result]
+
+ if format_only:
+ result = dataset.format_results(
+ result, indices=batch_indices, **format_args
+ )
+ if pre_eval:
+ # TODO: adapt samples_per_gpu > 1.
+ # only samples_per_gpu=1 valid now
+ result = dataset.pre_eval(result, indices=batch_indices)
+ results.extend(result)
+ else:
+ results.extend(result)
+
+ batch_size = len(result)
+ for _ in range(batch_size):
+ prog_bar.update()
+
+ return results
+
+
+def multi_gpu_test(
+ model,
+ data_loader,
+ tmpdir=None,
+ gpu_collect=False,
+ efficient_test=False,
+ pre_eval=False,
+ format_only=False,
+ format_args={},
+):
+ """Test model with multiple gpus by progressive mode.
+
+ This method tests model with multiple gpus and collects the results
+ under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+ it encodes results to gpu tensors and use gpu communication for results
+ collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+ and collects them by the rank 0 worker.
+
+ Args:
+ model (nn.Module): Model to be tested.
+ data_loader (utils.data.Dataloader): Pytorch data loader.
+ tmpdir (str): Path of directory to save the temporary results from
+ different gpus under cpu mode. The same path is used for efficient
+ test. Default: None.
+ gpu_collect (bool): Option to use either gpu or cpu to collect results.
+ Default: False.
+ efficient_test (bool): Whether save the results as local numpy files to
+ save CPU memory during evaluation. Mutually exclusive with
+ pre_eval and format_results. Default: False.
+ pre_eval (bool): Use dataset.pre_eval() function to generate
+ pre_results for metric evaluation. Mutually exclusive with
+ efficient_test and format_results. Default: False.
+ format_only (bool): Only format result for results commit.
+ Mutually exclusive with pre_eval and efficient_test.
+ Default: False.
+ format_args (dict): The args for format_results. Default: {}.
+
+ Returns:
+ list: list of evaluation pre-results or list of save file names.
+ """
+ if efficient_test:
+ warnings.warn(
+ "DeprecationWarning: ``efficient_test`` will be deprecated, the "
+ "evaluation is CPU memory friendly with pre_eval=True"
+ )
+ mmcv.mkdir_or_exist(".efficient_test")
+ # when none of them is set true, return segmentation results as
+ # a list of np.array.
+ assert [efficient_test, pre_eval, format_only].count(True) <= 1, (
+ "``efficient_test``, ``pre_eval`` and ``format_only`` are mutually "
+ "exclusive, only one of them could be true ."
+ )
+
+ model.eval()
+ results = []
+ dataset = data_loader.dataset.mmseg_dataset
+ # The pipeline about how the data_loader retrieval samples from dataset:
+ # sampler -> batch_sampler -> indices
+ # The indices are passed to dataset_fetcher to get data from dataset.
+ # data_fetcher -> collate_fn(dataset[index]) -> data_sample
+ # we use batch_sampler to get correct data idx
+
+ # batch_sampler based on DistributedSampler, the indices only point to data
+ # samples of related machine.
+ loader_indices = data_loader.batch_sampler
+
+ rank, world_size = get_dist_info()
+ if rank == 0:
+ prog_bar = mmcv.ProgressBar(len(dataset))
+
+ for batch_indices, data in zip(loader_indices, data_loader):
+ with torch.no_grad():
+ result = model(return_loss=False, rescale=True, **data)
+
+ if efficient_test:
+ result = [np2tmp(_, tmpdir=".efficient_test") for _ in result]
+
+ if format_only:
+ result = dataset.format_results(
+ result, indices=batch_indices, **format_args
+ )
+ if pre_eval:
+ # TODO: adapt samples_per_gpu > 1.
+ # only samples_per_gpu=1 valid now
+ result = dataset.pre_eval(result, indices=batch_indices)
+
+ results.extend(result)
+
+ if rank == 0:
+ batch_size = len(result) * world_size
+ for _ in range(batch_size):
+ prog_bar.update()
+
+ # collect results from all ranks
+ if gpu_collect:
+ results = collect_results_gpu(results, len(dataset))
+ else:
+ results = collect_results_cpu(results, len(dataset), tmpdir)
+ return results
diff --git a/python/deeplake/schemas.pyi b/python/deeplake/schemas.pyi
index 0e37e0c2c6..a18c5fc3ec 100644
--- a/python/deeplake/schemas.pyi
+++ b/python/deeplake/schemas.pyi
@@ -12,30 +12,39 @@ def TextEmbeddings(embedding_size: int, quantize: bool = False) -> SchemaTemplat
"""
A schema for storing embedded text from documents.
- - id (uint64)
- - chunk_index (uint16) Position of the text_chunk within the document
- - document_id (uint64) Unique identifier for the document the embedding came from
- - date_created (uint64) Timestamp the document was read
- - text_chunk (text) The text of the shard
- - embedding (dtype=float32, size=embedding_size) The embedding of the text
+ This schema includes the following fields:
+ - id (uint64): Unique identifier for each entry.
+ - chunk_index (uint16): Position of the text chunk within the document.
+ - document_id (uint64): Unique identifier for the document the embedding came from.
+ - date_created (uint64): Timestamp when the document was read.
+ - text_chunk (text): The text of the shard.
+ - embedding (dtype=float32, size=embedding_size): The embedding of the text.
Parameters:
- embedding_size: Size of the embeddings
- quantize: If true, quantize the embeddings to slightly decrease accuracy while greatly increasing query speed
+ embedding_size: int
+ Size of the embeddings.
+ quantize: bool, optional
+ If true, quantize the embeddings to slightly decrease accuracy while greatly increasing query speed. Default is False.
Examples:
+ Create a dataset with the standard schema:
```python
- # Create a dataset with the standard schema
- ds = deeplake.create("ds_path",
- schema=deeplake.schemas.TextEmbeddings(768).build())
-
- # Customize the schema before creating the dataset
- ds = deeplake.create("ds_path", schema=deeplake.schemas.TextEmbeddings(768)
- .rename("embedding", "text_embed")
- .add("author", types.Text())
- .build())
+ ds = deeplake.create("tmp://", schema=deeplake.schemas.TextEmbeddings(768))
```
+ Customize the schema before creating the dataset:
+ ```python
+ ds = deeplake.create("tmp://", schema=deeplake.schemas.TextEmbeddings(768)
+ .rename("embedding", "text_embed")
+ .add("author", types.Text()))
+ ```
+
+ Add a new field to the schema:
+ ```python
+ schema = deeplake.schemas.TextEmbeddings(768)
+ schema.add("language", types.Text())
+ ds = deeplake.create("tmp://", schema=schema)
+ ```
"""
...
@@ -49,59 +58,101 @@ def COCOImages(
"""
A schema for storing COCO-based image data.
- - id (uint64)
- - image (jpg image)
- - url (text)
- - year (uint8)
- - version (text)
- - description (text)
- - contributor (text)
- - date_created (uint64)
- - date_captured (uint64)
- - embedding (embedding)
- - license (text)
- - is_crowd (bool)
+ This schema includes the following fields:
+ - id (uint64): Unique identifier for each entry.
+ - image (jpg image): The image data.
+ - url (text): URL of the image.
+ - year (uint8): Year the image was captured.
+ - version (text): Version of the dataset.
+ - description (text): Description of the image.
+ - contributor (text): Contributor of the image.
+ - date_created (uint64): Timestamp when the image was created.
+ - date_captured (uint64): Timestamp when the image was captured.
+ - embedding (embedding): Embedding of the image.
+ - license (text): License information.
+ - is_crowd (bool): Whether the image contains a crowd.
If `objects` is true, the following fields are added:
- - objects_bbox (bounding box)
- - objects_classes (segment mask)
+ - objects_bbox (bounding box): Bounding boxes for objects.
+ - objects_classes (segment mask): Segment masks for objects.
If `keypoints` is true, the following fields are added:
- - keypoints_bbox (bounding box)
- - keypoints_classes (segment mask)
- - keypoints (2-dimensional array of uint32)
- - keypoints_skeleton (2-dimensional array of uint16)
+ - keypoints_bbox (bounding box): Bounding boxes for keypoints.
+ - keypoints_classes (segment mask): Segment masks for keypoints.
+ - keypoints (2-dimensional array of uint32): Keypoints data.
+ - keypoints_skeleton (2-dimensional array of uint16): Skeleton data for keypoints.
- if `stuffs` is true, the following fields are added:
- - stuffs_bbox (bounding boxes)
- - stuffs_classes (segment mask)
+ If `stuffs` is true, the following fields are added:
+ - stuffs_bbox (bounding boxes): Bounding boxes for stuffs.
+ - stuffs_classes (segment mask): Segment masks for stuffs.
Parameters:
- embedding_size: Size of the embeddings
- quantize: If true, quantize the embeddings to slightly decrease accuracy while greatly increasing query speed
+ embedding_size: int
+ Size of the embeddings.
+ quantize: bool, optional
+ If true, quantize the embeddings to slightly decrease accuracy while greatly increasing query speed. Default is False.
+ objects: bool, optional
+ Whether to include object-related fields. Default is True.
+ keypoints: bool, optional
+ Whether to include keypoint-related fields. Default is False.
+ stuffs: bool, optional
+ Whether to include stuff-related fields. Default is False.
Examples:
+ Create a dataset with the standard schema:
```python
- # Create a dataset with the standard schema
- ds = deeplake.create("ds_path",
- schema=deeplake.schemas.COCOImages(768).build())
+ ds = deeplake.create("tmp://", schema=deeplake.schemas.COCOImages(768))
+ ```
- # Customize the schema before creating the dataset
- ds = deeplake.create("ds_path", schema=deeplake.schemas.COCOImages(768,
- objects=True, keypoints=True)
+ Customize the schema before creating the dataset:
+ ```python
+ ds = deeplake.create("tmp://", schema=deeplake.schemas.COCOImages(768, objects=True, keypoints=True)
.rename("embedding", "image_embed")
- .add("author", types.Text()).build())
+ .add("author", types.Text()))
```
+ Add a new field to the schema:
+ ```python
+ schema = deeplake.schemas.COCOImages(768)
+ schema.add("location", types.Text())
+ ds = deeplake.create("tmp://", schema=schema)
+ ```
"""
...
class SchemaTemplate:
"""
- A template that can be used for creating a new dataset with [deeplake.create][]
+ A template that can be used for creating a new dataset with [deeplake.create][].
+
+ This class allows you to define and customize the schema for your dataset.
+
+ Parameters:
+ schema: dict
+ A dictionary where the key is the column name and the value is the data type.
+
+ Methods:
+ add(name: str, dtype: deeplake._deeplake.types.DataType | str | deeplake._deeplake.types.Type) -> SchemaTemplate:
+ Adds a new column to the template.
+ remove(name: str) -> SchemaTemplate:
+ Removes a column from the template.
+ rename(old_name: str, new_name: str) -> SchemaTemplate:
+ Renames a column in the template.
+
+ Examples:
+ Create a new schema template, modify it, and create a dataset with the schema:
+ ```python
+ schema = deeplake.schemas.SchemaTemplate({
+ "id": types.UInt64(),
+ "text": types.Text(),
+ "embedding": types.Embedding(768)
+ })
+ schema.add("author", types.Text())
+ schema.remove("text")
+ schema.rename("embedding", "text_embedding")
+ ds = deeplake.create("tmp://", schema=schema)
+ ```
"""
- # Temporary workaround. Need to remove `deeplake._deeplake` from the return type.
def __init__(
self,
schema: dict[
@@ -109,9 +160,9 @@ class SchemaTemplate:
],
) -> None:
"""
- Constructs a new SchemaTemplate from the given dict
+ Constructs a new SchemaTemplate from the given dict.
"""
- ...
+ # ...existing code...
def add(
self,
@@ -119,20 +170,47 @@ class SchemaTemplate:
dtype: deeplake._deeplake.types.DataType | str | deeplake._deeplake.types.Type,
) -> SchemaTemplate:
"""
- Adds a new column to the template
+ Adds a new column to the template.
Parameters:
- name: The column name
- dtype: The column data type
+ name: str
+ The column name.
+ dtype: deeplake._deeplake.types.DataType | str | deeplake._deeplake.types.Type
+ The column data type.
+
+ Returns:
+ SchemaTemplate: The updated schema template.
+
+ Examples:
+ Add a new column to the schema:
+ ```python
+ schema = deeplake.schemas.SchemaTemplate({})
+ schema.add("author", types.Text())
+ ```
"""
...
def remove(self, name: str) -> SchemaTemplate:
"""
- Removes a column from the template
+ Removes a column from the template.
Parameters:
- name: The column name
+ name: str
+ The column name.
+
+ Returns:
+ SchemaTemplate: The updated schema template.
+
+ Examples:
+ Remove a column from the schema:
+ ```python
+ schema = deeplake.schemas.SchemaTemplate({
+ "id": types.UInt64(),
+ "text": types.Text(),
+ "embedding": types.Embedding(768)
+ })
+ schema.remove("text")
+ ```
"""
...
@@ -141,7 +219,23 @@ class SchemaTemplate:
Renames a column in the template.
Parameters:
- old_name: Existing column name
- new_name: New column name
+ old_name: str
+ Existing column name.
+ new_name: str
+ New column name.
+
+ Returns:
+ SchemaTemplate: The updated schema template.
+
+ Examples:
+ Rename a column in the schema:
+ ```python
+ schema = deeplake.schemas.SchemaTemplate({
+ "id": types.UInt64(),
+ "text": types.Text(),
+ "embedding": types.Embedding(768)
+ })
+ schema.rename("embedding", "text_embedding")
+ ```
"""
...
diff --git a/python/deeplake/tql.pyi b/python/deeplake/tql.pyi
index 97f0d1e43c..fdb09e88c1 100644
--- a/python/deeplake/tql.pyi
+++ b/python/deeplake/tql.pyi
@@ -16,6 +16,15 @@ def register_function(function: typing.Callable) -> None:
TQL interacts with Python functions through `numpy.ndarray`. The Python function
to be used in TQL should accept input arguments as numpy arrays and return numpy array.
+
+
Examples:
```python
def next_number(a):
diff --git a/python/deeplake/types.pyi b/python/deeplake/types.pyi
index 896b198fc9..a0603be9d8 100644
--- a/python/deeplake/types.pyi
+++ b/python/deeplake/types.pyi
@@ -306,13 +306,15 @@ def Array(dtype: DataType | str, dimensions: int, shape: list[int]) -> DataType:
DataType: A new array data type with the specified parameters.
Examples:
- Create a three-dimensional array, where each dimension can have any number of elements::
-
- ds.add_column("col1", types.Array("int32", dimensions=3))
+ Create a three-dimensional array, where each dimension can have any number of elements:
+ ```python
+ ds.add_column("col1", types.Array("int32", dimensions=3))
+ ```
- Create a three-dimensional array, where each dimension has a known size::
-
- ds.add_column("col2", types.Array(types.Float32(), shape=[50, 30, 768]))
+ Create a three-dimensional array, where each dimension has a known size:
+ ```python
+ ds.add_column("col2", types.Array(types.Float32(), shape=[50, 30, 768]))
+ ```
"""
...
@@ -324,10 +326,11 @@ def Bool() -> DataType:
DataType: A new boolean data type.
Examples:
- Create columns with boolean type::
-
- ds.add_column("col1", types.Bool)
- ds.add_column("col2", "bool")
+ Create columns with boolean type:
+ ```python
+ ds.add_column("col1", types.Bool)
+ ds.add_column("col2", "bool")
+ ```
"""
...
@@ -349,13 +352,14 @@ def Text(index_type: str | TextIndexType | None = None) -> Type:
Type: A new text data type.
Examples:
- Create text columns with different configurations::
-
- ds.add_column("col1", types.Text)
- ds.add_column("col2", "text")
- ds.add_column("col3", str)
- ds.add_column("col4", types.Text(index_type=types.Inverted))
- ds.add_column("col4", types.Text(index_type=types.BM25))
+ Create text columns with different configurations:
+ ```python
+ ds.add_column("col1", types.Text)
+ ds.add_column("col2", "text")
+ ds.add_column("col3", str)
+ ds.add_column("col4", types.Text(index_type=types.Inverted))
+ ds.add_column("col5", types.Text(index_type=types.BM25))
+ ```
"""
...
@@ -387,11 +391,12 @@ def Dict() -> Type:
:func:`deeplake.types.Struct` for a type that supports defining allowed keys.
Examples:
- Create and use a dictionary column::
-
- ds.add_column("col1", types.Dict)
- ds.append([{"col1": {"a": 1, "b": 2}}])
- ds.append([{"col1": {"b": 3, "c": 4}}])
+ Create and use a dictionary column:
+ ```python
+ ds.add_column("col1", types.Dict)
+ ds.append([{"col1": {"a": 1, "b": 2}}])
+ ds.append([{"col1": {"b": 3, "c": 4}}])
+ ```
"""
...
@@ -419,10 +424,11 @@ def Embedding(
:func:`deeplake.types.Array` for a multidimensional array.
Examples:
- Create embedding columns::
-
- ds.add_column("col1", types.Embedding(768))
- ds.add_column("col2", types.Embedding(768, quantization=types.QuantizationType.Binary))
+ Create embedding columns:
+ ```python
+ ds.add_column("col1", types.Embedding(768))
+ ds.add_column("col2", types.Embedding(768, quantization=types.QuantizationType.Binary))
+ ```
"""
...
@@ -434,9 +440,10 @@ def Float32() -> DataType:
DataType: A new 32-bit float data type.
Examples:
- Create a column with 32-bit float type::
-
- ds.add_column("col1", types.Float32)
+ Create a column with 32-bit float type:
+ ```python
+ ds.add_column("col1", types.Float32)
+ ```
"""
...
@@ -448,9 +455,10 @@ def Float64() -> DataType:
DataType: A new 64-bit float data type.
Examples:
- Create a column with 64-bit float type::
-
- ds.add_column("col1", types.Float64)
+ Create a column with 64-bit float type:
+ ```python
+ ds.add_column("col1", types.Float64)
+ ```
"""
...
@@ -462,9 +470,10 @@ def Int16() -> DataType:
DataType: A new 16-bit integer data type.
Examples:
- Create a column with 16-bit integer type::
-
- ds.add_column("col1", types.Int16)
+ Create a column with 16-bit integer type:
+ ```python
+ ds.add_column("col1", types.Int16)
+ ```
"""
...
@@ -476,9 +485,10 @@ def Int32() -> DataType:
DataType: A new 32-bit integer data type.
Examples:
- Create a column with 32-bit integer type::
-
- ds.add_column("col1", types.Int32)
+ Create a column with 32-bit integer type:
+ ```python
+ ds.add_column("col1", types.Int32)
+ ```
"""
...
@@ -490,9 +500,10 @@ def Int64() -> DataType:
DataType: A new 64-bit integer data type.
Examples:
- Create a column with 64-bit integer type::
-
- ds.add_column("col1", types.Int64)
+ Create a column with 64-bit integer type:
+ ```python
+ ds.add_column("col1", types.Int64)
+ ```
"""
...
@@ -504,9 +515,10 @@ def Int8() -> DataType:
DataType: A new 8-bit integer data type.
Examples:
- Create a column with 8-bit integer type::
-
- ds.add_column("col1", types.Int8)
+ Create a column with 8-bit integer type:
+ ```python
+ ds.add_column("col1", types.Int8)
+ ```
"""
...
@@ -526,9 +538,10 @@ def Sequence(nested_type: DataType | str | Type) -> Type:
Type: A new sequence data type.
Examples:
- Create a sequence of images::
-
- ds.add_column("col1", types.Sequence(types.Image(sample_
+ Create a sequence of images:
+ ```python
+ ds.add_column("col1", types.Sequence(types.Image(sample_compression="jpg")))
+ ```
"""
def Image(dtype: DataType | str = "uint8", sample_compression: str = "png") -> Type:
@@ -554,7 +567,7 @@ def Image(dtype: DataType | str = "uint8", sample_compression: str = "png") -> T
Examples:
```python
ds.add_column("col1", types.Image)
- ds.add_column("col1", types.Image(sample_compression="jpg"))
+ ds.add_column("col2", types.Image(sample_compression="jpg"))
```
"""
...
@@ -615,7 +628,7 @@ def BinaryMask(
Examples:
```python
ds.add_column("col1", types.BinaryMask(sample_compression="lz4"))
- ds.append(np.zeros((512, 512, 5), dtype="bool"))
+ ds.append([{"col1": np.zeros((512, 512, 5), dtype="bool")}])
```
"""
...
@@ -637,7 +650,7 @@ def SegmentMask(
Examples:
```python
ds.add_column("col1", types.SegmentMask(sample_compression="lz4"))
- ds.append("col1", np.zeros((512, 512)))
+ ds.append([{"col1": np.zeros((512, 512, 3))}])
```
"""
...
@@ -655,14 +668,12 @@ def Struct(fields: dict[str, DataType | str]) -> DataType:
```python
ds.add_column("col1", types.Struct({
"field1": types.Int16(),
- "field2": types.Text(),
+ "field2": "text",
}))
ds.append([{"col1": {"field1": 3, "field2": "a"}}])
print(ds[0]["col1"]["field1"]) # Output: 3
```
-
-
"""
...