Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

possible bug in keras.utils.Sequence #21151

Open
pure-rgb opened this issue Apr 10, 2025 · 1 comment
Open

possible bug in keras.utils.Sequence #21151

pure-rgb opened this issue Apr 10, 2025 · 1 comment
Assignees
Labels

Comments

@pure-rgb
Copy link

Code

from tensorflow import keras
import numpy as np

X_test_data = [np.random.rand(20, 20, 1).astype(np.float32) for _ in range(5)]
y_test_data = [np.random.randint(0, 2, (20, 20,  1)).astype(np.float32) for _ in range(5)]

class SimpleDataLoader(keras.utils.Sequence):
    def __init__(
        self,
        x_data,
        y_data,
        batch_size=1,
        shuffle=True,
    ):
        self.x_data = x_data
        self.y_data = y_data
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.x_data) / self.batch_size))

    def __getitem__(self, index):
        print(f"__getitem__ called with index: {index})")
        indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        x_batch = [self.x_data[k] for k in indices]
        y_batch = [self.y_data[k] for k in indices]

        if not x_batch:
            print(f"Warning: x_batch is empty for index {index})")
        if not y_batch:
            print(f"Warning: y_batch is empty for index {index})")

        X = np.stack(x_batch, axis=0)
        y = np.stack(y_batch, axis=0)

        return X, y

    def on_epoch_end(self):
        self.indices = np.arange(len(self.x_data))
        if self.shuffle:
            np.random.shuffle(self.indices)

batch_size = 1

vl = SimpleDataLoader(
    x_data=X_test_data,
    y_data=y_test_data,
    batch_size=1,
    shuffle=False,
)

print(len(vl))
for x, y in vl:
    print(f"Shape: x={x.shape}, y={y.shape}")

The length of the input is simply 5, if for validation, it should stop after iterating over the dataset but it keeps trying more and ended up error.

5
__getitem__ called with index: 0)
Shape: x=(1, 20, 20, 1), y=(1, 20, 20, 1)
__getitem__ called with index: 1)
Shape: x=(1, 20, 20, 1), y=(1, 20, 20, 1)
__getitem__ called with index: 2)
Shape: x=(1, 20, 20, 1), y=(1, 20, 20, 1)
__getitem__ called with index: 3)
Shape: x=(1, 20, 20, 1), y=(1, 20, 20, 1)
__getitem__ called with index: 4)
Shape: x=(1, 20, 20, 1), y=(1, 20, 20, 1)
__getitem__ called with index: 5)
Warning: x_batch is empty for index 5)
Warning: y_batch is empty for index 5)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/ipykernel_31/1498463694.py in <cell line: 0>()
     54 
     55 print(len(vl))
---> 56 for x, y in vl:
     57     print(f"Shape: x={x.shape}, y={y.shape}")

/tmp/ipykernel_31/1498463694.py in __getitem__(self, index)
     34             print(f"Warning: y_batch is empty for index {index})")
     35 
---> 36         X = np.stack(x_batch, axis=0)
     37         y = np.stack(y_batch, axis=0)
     38 

/usr/local/lib/python3.11/dist-packages/numpy/core/shape_base.py in stack(arrays, axis, out, dtype, casting)
    443     arrays = [asanyarray(arr) for arr in arrays]
    444     if not arrays:
--> 445         raise ValueError('need at least one array to stack')
    446 
    447     shapes = {arr.shape for arr in arrays}

ValueError: need at least one array to stack
@pure-rgb
Copy link
Author

pure-rgb commented Apr 10, 2025

Raised before.

#19994
#20001

Why the hell hasn’t this been fixed yet?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

3 participants