Add memory-mapped support for Kinetics-skeleton data converter (#516)

* Add mmap kinetics data generation * Update docs * Fix PEP8 formating * Fix PEP8 [redefinition] --------- Co-authored-by: Nikolaos Passalis <passalis@users.noreply.github.com>
opendr-eu · Jan 29, 2025 · 2759cee · 2759cee
1 parent 2ce6f6b
commit 2759cee
Show file tree

Hide file tree

Showing 2 changed files with 87 additions and 12 deletions.
diff --git a/docs/reference/skeleton-based-action-recognition.md b/docs/reference/skeleton-based-action-recognition.md
@@ -17,6 +17,12 @@ You need to specify the path of the downloaded data as `--data_path` and the pat
 ntu_samples_with_missing_skeletons.txt provides the NTU-RGB+D sample indices which don't contain any skeleton.
 You need to specify the path of this file with --ignored_sample_path.
 
+If you have limited RAM resources, you can enable memory mapping for the `kinetics_gendata.py` by adding the `--use_mmap` flag and optionally specifying a chunk size (e.g., `--chunk_size 256`). The default chunk size is set to 128.
+
+```bash
+python3 kinetics_gendata.py --data_path ./data/kinetics_raw_skeletons --out_folder ./data/preprocessed_kinetics_skeletons --use_mmap --chunk_size 256
+```
+
 ### Class SpatioTemporalGCNLearner
 Bases: `engine.learners.Learner`
 

diff --git a/...pendr/perception/skeleton_based_action_recognition/algorithm/datasets/kinetics_gendata.py b/...pendr/perception/skeleton_based_action_recognition/algorithm/datasets/kinetics_gendata.py
@@ -10,7 +10,7 @@
 from tqdm import tqdm
 import pandas
 from pathlib import Path
-
+from numpy.lib.format import open_memmap
 
 KINETICS400_CLASSES = pandas.read_csv(Path(__file__).parent /
                                       'kinetics400_classes.csv', verbose=True, index_col=0).to_dict()["name"]
@@ -44,23 +44,92 @@ def gendata(data_path, label_path,
     np.save(data_out_path, fp)
 
 
+def gendata_mmap(data_path, label_path,
+                 data_out_path, label_out_path,
+                 num_person_in=5,  # observe the first 5 persons
+                 num_person_out=2,  # then choose 2 persons with the highest score
+                 max_frame=300,
+                 chunk_size=128):
+
+    feeder = KineticsFeeder(
+        data_path=data_path,
+        label_path=label_path,
+        num_person_in=num_person_in,
+        num_person_out=num_person_out,
+        window_size=max_frame
+    )
+
+    sample_name = feeder.sample_name
+    num_samples = len(sample_name)
+    sample_label = [None] * num_samples  # avoid appending
+
+    fp_shape = (num_samples, 3, max_frame, 18, num_person_out)  # configure open_memmap
+    fp_dtype = np.float32
+
+    # create empty file in disk
+    fp = open_memmap(
+        data_out_path,
+        mode='w+',
+        dtype=fp_dtype,
+        shape=fp_shape
+    )
+
+    for start_idx in range(0, num_samples, chunk_size):
+        end_idx = min(start_idx + chunk_size, num_samples)
+        current_size = end_idx - start_idx
+
+        for i in tqdm(range(current_size), desc=f"Chunk {start_idx}-{end_idx}", leave=False):
+
+            idx_global = start_idx + i
+            data, label = feeder[idx_global]
+            T = data.shape[1]
+
+            fp[idx_global, :, :T, :, :] = data
+            sample_label[idx_global] = label
+
+        fp.flush()  # write to disk
+    del fp
+
+    with open(label_out_path, 'wb') as f:
+        pickle.dump((sample_name, sample_label), f)
+
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
-        description='Kinetics-skeleton Data Converter.')
-    parser.add_argument(
-        '--data_path', default='./data/kinetics_raw')
-    parser.add_argument(
-        '--out_folder', default='./data/kinetics')
+        description='Kinetics-skeleton Data Converter.'
+    )
+    parser.add_argument('--data_path', default='./data/kinetics_raw')
+    parser.add_argument('--out_folder', default='./data/kinetics')
+    parser.add_argument('--use_mmap', action='store_true',
+                        help="Whether to use memory-mapped numpy arrays.")
+    parser.add_argument('--chunk_size', type=int, default=128,
+                        help="Number of samples processed in each chunk.")
     arg = parser.parse_args()
 
     part = ['val', 'train']
     for p in part:
-        print('kinetics ', p)
+        print('Kinetics', p)
         if not os.path.exists(arg.out_folder):
             os.makedirs(arg.out_folder)
-        data_path = '{}/kinetics_{}'.format(arg.data_path, p)
-        label_path = '{}/kinetics_{}_label.json'.format(arg.data_path, p)
-        data_out_path = '{}/{}_data_joint.npy'.format(arg.out_folder, p)
-        label_out_path = '{}/{}_label.pkl'.format(arg.out_folder, p)
+        data_path = f'{arg.data_path}/kinetics_{p}'
+        label_path = f'{arg.data_path}/kinetics_{p}_label.json'
+        data_out_path = f'{arg.out_folder}/{p}_data_joint.npy'
+        label_out_path = f'{arg.out_folder}/{p}_label.pkl'
 
-        gendata(data_path, label_path, data_out_path, label_out_path)
+        if not arg.use_mmap:
+            gendata(
+                data_path, label_path,
+                data_out_path, label_out_path,
+                num_person_in=5,
+                num_person_out=2,
+                max_frame=300
+            )
+        else:
+            gendata_mmap(
+                data_path, label_path,
+                data_out_path, label_out_path,
+                num_person_in=5,
+                num_person_out=2,
+                max_frame=300,
+                chunk_size=arg.chunk_size
+            )