Skip to content

Commit

Permalink
Add regex support to _use_legacy_file_structure() (#151)
Browse files Browse the repository at this point in the history
* add regex support with _use_legacy_file_structure()

* fix linter error

* Update tests/test_filesystem.py

Co-authored-by: Hagen Wierstorf <hwierstorf@audeering.com>

* fix linter errors

---------

Co-authored-by: Hagen Wierstorf <hwierstorf@audeering.com>
  • Loading branch information
frankenjoe and hagenw authored Oct 18, 2023
1 parent d529526 commit 6cc0190
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 24 deletions.
19 changes: 18 additions & 1 deletion audbackend/core/artifactory.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import re
import typing

import artifactory
Expand Down Expand Up @@ -157,6 +158,7 @@ def __init__(
# see _use_legacy_file_structure()
self._legacy_extensions = []
self._legacy_file_structure = False
self._legacy_file_structure_regex = False

def _access(
self,
Expand Down Expand Up @@ -272,7 +274,12 @@ def _legacy_split_ext(
for custom_ext in self._legacy_extensions:
# check for custom extension
# ensure basename is not empty
if name[1:].endswith(f'.{custom_ext}'):
if self._legacy_file_structure_regex:
pattern = rf'\.({custom_ext})$'
match = re.search(pattern, name[1:])
if match:
ext = match.group(1)
elif name[1:].endswith(f'.{custom_ext}'):
ext = custom_ext
if ext is None:
# if no custom extension is found
Expand Down Expand Up @@ -431,6 +438,7 @@ def _use_legacy_file_structure(
self,
*,
extensions: typing.List[str] = None,
regex: bool = False,
):
r"""Use legacy file structure.
Expand Down Expand Up @@ -462,7 +470,16 @@ def _use_legacy_file_structure(
``'.../file.tar.gz'``
will then translate into
``'.../file/1.0.0/file-1.0.0.tar.gz'``.
E.g.
with
``backend._use_legacy_file_structure(extensions=['\d+.tar.gz'],
regex=True)``
the backend path
``'.../file.99.tar.gz'``
will translate into
``'.../file/1.0.0/file-1.0.0.99.tar.gz'``.
"""
self._legacy_file_structure = True
self._legacy_extensions = extensions or []
self._legacy_file_structure_regex = regex
21 changes: 20 additions & 1 deletion audbackend/core/filesystem.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import os
import re
import shutil
import typing

Expand Down Expand Up @@ -30,6 +31,7 @@ def __init__(
# see _use_legacy_file_structure()
self._legacy_extensions = []
self._legacy_file_structure = False
self._legacy_file_structure_regex = False

def _access(
self,
Expand Down Expand Up @@ -135,7 +137,12 @@ def _legacy_split_ext(
for custom_ext in self._legacy_extensions:
# check for custom extension
# ensure basename is not empty
if name[1:].endswith(f'.{custom_ext}'):
if self._legacy_file_structure_regex:
pattern = rf'\.({custom_ext})$'
match = re.search(pattern, name[1:])
if match:
ext = match.group(1)
elif name[1:].endswith(f'.{custom_ext}'):
ext = custom_ext
if ext is None:
# if no custom extension is found
Expand Down Expand Up @@ -274,6 +281,7 @@ def _use_legacy_file_structure(
self,
*,
extensions: typing.List[str] = None,
regex: bool = False,
):
r"""Use legacy file structure.
Expand Down Expand Up @@ -305,7 +313,18 @@ def _use_legacy_file_structure(
``'.../file.tar.gz'``
will then translate into
``'.../file/1.0.0/file-1.0.0.tar.gz'``.
If ``regex`` is set to ``True``,
the extensions are treated as regular expressions.
E.g.
with
``backend._use_legacy_file_structure(extensions=['\d+.tar.gz'],
regex=True)``
the backend path
``'.../file.99.tar.gz'``
will translate into
``'.../file/1.0.0/file-1.0.0.99.tar.gz'``.
"""
self._legacy_file_structure = True
self._legacy_extensions = extensions or []
self._legacy_file_structure_regex = regex
77 changes: 66 additions & 11 deletions tests/test_artifactory.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,22 +136,77 @@ def test_errors(tmpdir, backend):
indirect=True,
)
@pytest.mark.parametrize(
'file, version, extensions, expected',
'file, version, extensions, regex, expected',
[
('/file.tar.gz', '1.0.0', None, 'file.tar/1.0.0/file.tar-1.0.0.gz'),
('/file.tar.gz', '1.0.0', [], 'file.tar/1.0.0/file.tar-1.0.0.gz'),
('/file.tar.gz', '1.0.0', ['tar.gz'], 'file/1.0.0/file-1.0.0.tar.gz'),
('/.tar.gz', '1.0.0', ['tar.gz'], '.tar/1.0.0/.tar-1.0.0.gz'),
('/tar.gz', '1.0.0', ['tar.gz'], 'tar/1.0.0/tar-1.0.0.gz'),
('/.tar.gz', '1.0.0', None, '.tar/1.0.0/.tar-1.0.0.gz'),
('/.tar', '1.0.0', None, '.tar/1.0.0/.tar-1.0.0'),
('/tar', '1.0.0', None, 'tar/1.0.0/tar-1.0.0'),
(
'/file.tar.gz', '1.0.0', None, False,
'file.tar/1.0.0/file.tar-1.0.0.gz',
),
(
'/file.tar.gz', '1.0.0', [], False,
'file.tar/1.0.0/file.tar-1.0.0.gz',
),
(
'/file.tar.gz', '1.0.0', ['tar.gz'], False,
'file/1.0.0/file-1.0.0.tar.gz',
),
(
'/.tar.gz', '1.0.0', ['tar.gz'], False,
'.tar/1.0.0/.tar-1.0.0.gz',
),
(
'/tar.gz', '1.0.0', ['tar.gz'], False,
'tar/1.0.0/tar-1.0.0.gz',
),
(
'/.tar.gz', '1.0.0', None, False,
'.tar/1.0.0/.tar-1.0.0.gz',
),
(
'/.tar', '1.0.0', None, False,
'.tar/1.0.0/.tar-1.0.0',
),
(
'/tar', '1.0.0', None, False,
'tar/1.0.0/tar-1.0.0',
),
# test regex
(
'/file.0.tar.gz', '1.0.0', [r'\d+.tar.gz'], False,
'file.0.tar/1.0.0/file.0.tar-1.0.0.gz',
),
(
'/file.0.tar.gz', '1.0.0', [r'\d+.tar.gz'], True,
'file/1.0.0/file-1.0.0.0.tar.gz',
),
(
'/file.99.tar.gz', '1.0.0', [r'\d+.tar.gz'], True,
'file/1.0.0/file-1.0.0.99.tar.gz',
),
(
'/file.prediction.99.tar.gz', '1.0.0',
[r'prediction.\d+.tar.gz', r'truth.tar.gz'], True,
'file/1.0.0/file-1.0.0.prediction.99.tar.gz',
),
(
'/file.truth.tar.gz', '1.0.0',
[r'prediction.\d+.tar.gz', r'truth.tar.gz'], True,
'file/1.0.0/file-1.0.0.truth.tar.gz',
),
(
'/file.99.tar.gz', '1.0.0', [r'(\d+.)?tar.gz'], True,
'file/1.0.0/file-1.0.0.99.tar.gz',
),
(
'/file.tar.gz', '1.0.0', [r'(\d+.)?tar.gz'], True,
'file/1.0.0/file-1.0.0.tar.gz',
),
]
)
def test_legacy_file_structure(tmpdir, backend, file, version, extensions,
expected):
regex, expected):

backend._use_legacy_file_structure(extensions=extensions)
backend._use_legacy_file_structure(extensions=extensions, regex=regex)

src_path = audeer.touch(audeer.path(tmpdir, 'tmp'))
backend.put_file(src_path, file, version)
Expand Down
77 changes: 66 additions & 11 deletions tests/test_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,79 @@ def test_get_file_interrupt(tmpdir, bad_file_system, backend):
indirect=True,
)
@pytest.mark.parametrize(
'file, version, extensions, expected',
'file, version, extensions, regex, expected',
[
('/file.tar.gz', '1.0.0', None, 'file.tar/1.0.0/file.tar-1.0.0.gz'),
('/file.tar.gz', '1.0.0', [], 'file.tar/1.0.0/file.tar-1.0.0.gz'),
('/file.tar.gz', '1.0.0', ['tar.gz'], 'file/1.0.0/file-1.0.0.tar.gz'),
('/.tar.gz', '1.0.0', ['tar.gz'], '.tar/1.0.0/.tar-1.0.0.gz'),
('/tar.gz', '1.0.0', ['tar.gz'], 'tar/1.0.0/tar-1.0.0.gz'),
('/.tar.gz', '1.0.0', None, '.tar/1.0.0/.tar-1.0.0.gz'),
('/.tar', '1.0.0', None, '.tar/1.0.0/.tar-1.0.0'),
('/tar', '1.0.0', None, 'tar/1.0.0/tar-1.0.0'),
(
'/file.tar.gz', '1.0.0', None, False,
'file.tar/1.0.0/file.tar-1.0.0.gz',
),
(
'/file.tar.gz', '1.0.0', [], False,
'file.tar/1.0.0/file.tar-1.0.0.gz',
),
(
'/file.tar.gz', '1.0.0', ['tar.gz'], False,
'file/1.0.0/file-1.0.0.tar.gz',
),
(
'/.tar.gz', '1.0.0', ['tar.gz'], False,
'.tar/1.0.0/.tar-1.0.0.gz',
),
(
'/tar.gz', '1.0.0', ['tar.gz'], False,
'tar/1.0.0/tar-1.0.0.gz',
),
(
'/.tar.gz', '1.0.0', None, False,
'.tar/1.0.0/.tar-1.0.0.gz',
),
(
'/.tar', '1.0.0', None, False,
'.tar/1.0.0/.tar-1.0.0',
),
(
'/tar', '1.0.0', None, False,
'tar/1.0.0/tar-1.0.0',
),
# test regex
(
'/file.0.tar.gz', '1.0.0', [r'\d+.tar.gz'], False,
'file.0.tar/1.0.0/file.0.tar-1.0.0.gz',
),
(
'/file.0.tar.gz', '1.0.0', [r'\d+.tar.gz'], True,
'file/1.0.0/file-1.0.0.0.tar.gz',
),
(
'/file.99.tar.gz', '1.0.0', [r'\d+.tar.gz'], True,
'file/1.0.0/file-1.0.0.99.tar.gz',
),
(
'/file.prediction.99.tar.gz', '1.0.0',
[r'prediction.\d+.tar.gz', r'truth.tar.gz'], True,
'file/1.0.0/file-1.0.0.prediction.99.tar.gz',
),
(
'/file.truth.tar.gz', '1.0.0',
[r'prediction.\d+.tar.gz', r'truth.tar.gz'], True,
'file/1.0.0/file-1.0.0.truth.tar.gz',
),
(
'/file.99.tar.gz', '1.0.0', [r'(\d+.)?tar.gz'], True,
'file/1.0.0/file-1.0.0.99.tar.gz',
),
(
'/file.tar.gz', '1.0.0', [r'(\d+.)?tar.gz'], True,
'file/1.0.0/file-1.0.0.tar.gz',
),
]
)
def test_legacy_file_structure(tmpdir, backend, file, version, extensions,
expected):
regex, expected):

expected = expected.replace('/', os.path.sep)

backend._use_legacy_file_structure(extensions=extensions)
backend._use_legacy_file_structure(extensions=extensions, regex=regex)

src_path = audeer.touch(audeer.path(tmpdir, 'tmp'))
backend.put_file(src_path, file, version)
Expand Down

0 comments on commit 6cc0190

Please sign in to comment.