From a64eba6ff43a0da26b7242d34d6d03fb2266bf97 Mon Sep 17 00:00:00 2001 From: Johannes Wagner Date: Thu, 18 Jan 2024 11:35:09 +0100 Subject: [PATCH] Remove version from implementation (#165) * Backend: expand path with version * remove version handling * remove version handling * TST: fix test * TST: fix test of docstring examples * DOC: fix SQL example * fix linter errors * TST: fix use backend path separator when expanding path * Backend: check path and version in _path_with_version() * Backend.latest_version(): remove path check * Backend._ls(): improve docstring --- audbackend/core/artifactory.py | 219 ++++---------------------- audbackend/core/backend.py | 275 ++++++++++++++++++++++++++------- audbackend/core/conftest.py | 2 - audbackend/core/filesystem.py | 204 +++--------------------- docs/legacy.rst | 6 +- docs/usage.rst | 61 +++----- tests/singlefolder.py | 47 ++---- tests/test_artifactory.py | 2 +- tests/test_filesystem.py | 2 +- 9 files changed, 310 insertions(+), 508 deletions(-) diff --git a/audbackend/core/artifactory.py b/audbackend/core/artifactory.py index d8acdbe6..a9f9163d 100644 --- a/audbackend/core/artifactory.py +++ b/audbackend/core/artifactory.py @@ -1,5 +1,4 @@ import os -import re import typing import artifactory @@ -154,12 +153,6 @@ def __init__( ) self._repo = path.find_repository_local(self.repository) - # to support legacy file structure - # see _use_legacy_file_structure() - self._legacy_extensions = [] - self._legacy_file_structure = False - self._legacy_file_structure_regex = False - def _access( self, ): @@ -170,10 +163,9 @@ def _access( def _checksum( self, path: str, - version: str, ) -> str: r"""MD5 checksum of file on backend.""" - path = self._path(path, version) + path = self._path(path) checksum = artifactory.ArtifactoryPath.stat(path).md5 return checksum @@ -214,10 +206,9 @@ def _create( def _date( self, path: str, - version: str, ) -> str: r"""Get last modification date of file on backend.""" - path = self._path(path, version) + path = self._path(path) date = path.stat().mtime date = utils.date_format(date) return date @@ -231,10 +222,14 @@ def _delete( def _exists( self, path: str, - version: str, ) -> bool: r"""Check if file exists on backend.""" - path = self._path(path, version) + path = self._expand(path) + path = _artifactory_path( + path, + self._username, + self._api_key, + ) return path.exists() def _expand( @@ -258,154 +253,52 @@ def _get_file( self, src_path: str, dst_path: str, - version: str, verbose: bool, ): r"""Get file from backend.""" - src_path = self._path(src_path, version) + src_path = self._path(src_path) _download(src_path, dst_path, verbose=verbose) - def _legacy_split_ext( - self, - name: str, - ) -> typing.Tuple[str, str]: - r"""Split name into basename and extension.""" - ext = None - for custom_ext in self._legacy_extensions: - # check for custom extension - # ensure basename is not empty - if self._legacy_file_structure_regex: - pattern = rf'\.({custom_ext})$' - match = re.search(pattern, name[1:]) - if match: - ext = match.group(1) - elif name[1:].endswith(f'.{custom_ext}'): - ext = custom_ext - if ext is None: - # if no custom extension is found - # use last string after dot - ext = audeer.file_extension(name) - - base = audeer.replace_file_extension(name, '', ext=ext) - - if ext: - ext = f'.{ext}' - - return base, ext - def _ls( self, path: str, - ) -> typing.List[typing.Tuple[str, str]]: - r"""List all files under (sub-)path.""" - if path.endswith('/'): # find files under sub-path - - path = self._expand(path) - path = _artifactory_path( - path, - self._username, - self._api_key, - ) - if not path.exists(): - utils.raise_file_not_found_error(str(path)) - - paths = [str(x) for x in path.glob("**/*") if x.is_file()] - - else: # find versions of path - - root, name = self.split(path) - - if self._legacy_file_structure: - base, _ = self._legacy_split_ext(name) - root = f'{self._expand(root)}{base}' - else: - root = self._expand(root) - - root = _artifactory_path( - root, - self._username, - self._api_key, - ) - vs = [os.path.basename(str(f)) for f in root if f.is_dir] - - # filter out other files with same root and version - paths = [str(self._path(path, v)) - for v in vs if self._exists(path, v)] - - if not paths: - utils.raise_file_not_found_error(path) - - # /// - # -> - # (//, ) - # - # or legacy: - # - # /////-. - # -> - # (//., ) - - result = [] - for p in paths: - - p = self._collapse(p) # remove host and repo - tokens = p.split('/') - - name = tokens[-1] - version = tokens[-2] - - if self._legacy_file_structure: - base = tokens[-3] - ext = name[len(base) + len(version) + 1:] - name = f'{base}{ext}' - path = self.sep.join(tokens[:-3]) - else: - path = self.sep.join(tokens[:-2]) - - path = self.sep + path - path = self.join(path, name) - - result.append((path, version)) - - return result + ) -> typing.List[str]: + r"""List all files under sub-path.""" + path = self._path(path) + path = _artifactory_path( + path, + self._username, + self._api_key, + ) + if not path.exists(): + utils.raise_file_not_found_error(str(path)) + + paths = [str(x) for x in path.glob("**/*") if x.is_file()] + paths = [self._collapse(path) for path in paths] + + return paths def _owner( self, path: str, - version: str, ) -> str: r"""Get owner of file on backend.""" - path = self._path(path, version) + path = self._path(path) owner = path.stat().modified_by return owner def _path( self, path: str, - version: str, ) -> artifactory.ArtifactoryPath: r"""Convert to backend path. - / - -> - //// - - or legacy: - - /. + -> - /////-. + // """ - root, name = self.split(path) - root = self._expand(root) - - if self._legacy_file_structure: - base, ext = self._legacy_split_ext(name) - path = f'{root}{base}/{version}/{base}-{version}{ext}' - else: - path = f'{root}{version}/{name}' - + path = self._expand(path) path = _artifactory_path( path, self._username, @@ -417,69 +310,17 @@ def _put_file( self, src_path: str, dst_path: str, - version: str, checksum: str, verbose: bool, ): r"""Put file to backend.""" - dst_path = self._path(dst_path, version) + dst_path = self._path(dst_path) _deploy(src_path, dst_path, checksum, verbose=verbose) def _remove_file( self, path: str, - version: str, ): r"""Remove file from backend.""" - path = self._path(path, version) + path = self._path(path) path.unlink() - - def _use_legacy_file_structure( - self, - *, - extensions: typing.List[str] = None, - regex: bool = False, - ): - r"""Use legacy file structure. - - Stores files under - ``'...///-.'`` - instead of - ``'...//'``. - By default, - the extension - ```` - is set to the string after the last dot. - I.e., - the backend path - ``'.../file.tar.gz'`` - will translate into - ``'.../file.tar/1.0.0/file.tar-1.0.0.gz'``. - However, - by passing a list with custom extensions - it is possible to overwrite - the default behavior - for certain extensions. - E.g., - with - ``backend._use_legacy_file_structure(extensions=['tar.gz'])`` - it is ensured that - ``'tar.gz'`` - will be recognized as an extension - and the backend path - ``'.../file.tar.gz'`` - will then translate into - ``'.../file/1.0.0/file-1.0.0.tar.gz'``. - E.g. - with - ``backend._use_legacy_file_structure(extensions=['\d+.tar.gz'], - regex=True)`` - the backend path - ``'.../file.99.tar.gz'`` - will translate into - ``'.../file/1.0.0/file-1.0.0.99.tar.gz'``. - - """ - self._legacy_file_structure = True - self._legacy_extensions = extensions or [] - self._legacy_file_structure_regex = regex diff --git a/audbackend/core/backend.py b/audbackend/core/backend.py index 4183b663..400972e4 100644 --- a/audbackend/core/backend.py +++ b/audbackend/core/backend.py @@ -1,11 +1,14 @@ +import errno import fnmatch import os +import re import tempfile import typing import audeer from audbackend.core import utils +from audbackend.core.errors import BackendError class Backend: @@ -28,6 +31,12 @@ def __init__( self.repository = repository r"""Repository name.""" + # to support legacy file structure + # see _use_legacy_file_structure() + self._legacy_extensions = [] + self._legacy_file_structure = False + self._legacy_file_structure_regex = False + def __repr__(self) -> str: # noqa: D105 name = f'{self.__class__.__module__}.{self.__class__.__name__}' return str((name, self.host, self.repository)) @@ -45,7 +54,6 @@ def _access( def _checksum( self, path: str, - version: str, ) -> str: # pragma: no cover r"""MD5 checksum of file on backend.""" raise NotImplementedError() @@ -77,13 +85,11 @@ def checksum( 'd41d8cd98f00b204e9800998ecf8427e' """ - path = utils.check_path(path) - version = utils.check_version(version) + path_with_version = self._path_with_version(path, version) return utils.call_function_on_backend( self._checksum, - path, - version, + path_with_version, ) def _create( @@ -99,7 +105,6 @@ def _create( def _date( self, path: str, - version: str, ) -> str: # pragma: no cover r"""Get date of file on backend. @@ -139,13 +144,11 @@ def date( '1991-02-20' """ - path = utils.check_path(path) - version = utils.check_version(version) + path_with_version = self._path_with_version(path, version) return utils.call_function_on_backend( self._date, - path, - version, + path_with_version, ) def _delete( @@ -157,7 +160,6 @@ def _delete( def _exists( self, path: str, - version: str, ) -> bool: # pragma: no cover r"""Check if file exists on backend.""" raise NotImplementedError() @@ -195,13 +197,11 @@ def exists( True """ - path = utils.check_path(path) - version = utils.check_version(version) + path_with_version = self._path_with_version(path, version) return utils.call_function_on_backend( self._exists, - path, - version, + path_with_version, suppress_backend_errors=suppress_backend_errors, fallback_return_value=False, ) @@ -280,7 +280,6 @@ def _get_file( self, src_path: str, dst_path: str, - version: str, verbose: bool, ): # pragma: no cover r"""Get file from backend.""" @@ -338,8 +337,7 @@ def get_file( True """ - src_path = utils.check_path(src_path) - version = utils.check_version(version) + src_path_with_version = self._path_with_version(src_path, version) dst_path = audeer.path(dst_path) if os.path.isdir(dst_path): @@ -365,9 +363,8 @@ def get_file( tmp_path = audeer.path(tmp, '~') utils.call_function_on_backend( self._get_file, - src_path, + src_path_with_version, tmp_path, - version, verbose, ) audeer.move_file(tmp_path, dst_path) @@ -435,18 +432,48 @@ def latest_version( '2.0.0' """ - path = utils.check_path(path) vs = self.versions(path) return vs[-1] + def _legacy_split_ext( + self, + name: str, + ) -> typing.Tuple[str, str]: + r"""Split name into basename and extension.""" + ext = None + for custom_ext in self._legacy_extensions: + # check for custom extension + # ensure basename is not empty + if self._legacy_file_structure_regex: + pattern = rf'\.({custom_ext})$' + match = re.search(pattern, name[1:]) + if match: + ext = match.group(1) + elif name[1:].endswith(f'.{custom_ext}'): + ext = custom_ext + if ext is None: + # if no custom extension is found + # use last string after dot + ext = audeer.file_extension(name) + + base = audeer.replace_file_extension(name, '', ext=ext) + + if ext: + ext = f'.{ext}' + + return base, ext + def _ls( self, path: str, - ) -> typing.List[typing.Tuple[str, str]]: # pragma: no cover - r"""List all files under (sub-)path. + ) -> typing.List[str]: # pragma: no cover + r"""List all files under sub-path. - * If path does not exist an error should be raised - * If path ends on `/` it is a sub-path + If ``path`` is ``'/'`` and no files exist on the repository, + an empty list should be returned + Otherwise, + if ``path`` does not exist or no files are found under ``path``, + an error should be raised. """ raise NotImplementedError() @@ -515,27 +542,95 @@ def ls( """ # noqa: E501 path = utils.check_path(path) - paths = utils.call_function_on_backend( - self._ls, - path, - suppress_backend_errors=suppress_backend_errors, - fallback_return_value=[], - ) + + if path.endswith('/'): # find files under sub-path + + paths = utils.call_function_on_backend( + self._ls, + path, + suppress_backend_errors=suppress_backend_errors, + fallback_return_value=[], + ) + + else: # find versions of path + + root, file = self.split(path) + paths = utils.call_function_on_backend( + self._ls, + root, + suppress_backend_errors=suppress_backend_errors, + fallback_return_value=[], + ) + + # filter for '/root/version/file' + if self._legacy_file_structure: + depth = root.count('/') + 2 + name, ext = self._legacy_split_ext(file) + match = re.compile(rf'{name}-\d+\.\d+.\d+{ext}') + paths = [ + p for p in paths + if ( + p.count('/') == depth and + match.match(os.path.basename(p)) + ) + ] + else: + depth = root.count('/') + 1 + paths = [ + p for p in paths + if ( + p.count('/') == depth and + os.path.basename(p) == file + ) + ] + + if not paths and not suppress_backend_errors: + # since the backend does no longer raise an error + # if the path does not exist + # we have to do it + ex = FileNotFoundError( + errno.ENOENT, + os.strerror(errno.ENOENT), + path, + ) + raise BackendError(ex) + if not paths: - return paths + return [] + + paths_and_versions = [] + for p in paths: + + tokens = p.split(self.sep) + + name = tokens[-1] + version = tokens[-2] + + if self._legacy_file_structure: + base = tokens[-3] + ext = name[len(base) + len(version) + 1:] + name = f'{base}{ext}' + path = self.sep.join(tokens[:-3]) + else: + path = self.sep.join(tokens[:-2]) + + path = self.sep + path + path = self.join(path, name) + + paths_and_versions.append((path, version)) - paths = sorted(paths) + paths_and_versions = sorted(paths_and_versions) if pattern: - paths = [ - (p, v) for p, v in paths + paths_and_versions = [ + (p, v) for p, v in paths_and_versions if fnmatch.fnmatch(os.path.basename(p), pattern) ] if latest_version: # d[path] = ['1.0.0', '2.0.0'] d = {} - for p, v in paths: + for p, v in paths_and_versions: if p not in d: d[p] = [] d[p].append(v) @@ -543,14 +638,13 @@ def ls( for p, vs in d.items(): d[p] = audeer.sort_versions(vs)[-1] # [(path, '2.0.0')] - paths = [(p, v) for p, v in d.items()] + paths_and_versions = [(p, v) for p, v in d.items()] - return paths + return paths_and_versions def _owner( self, path: str, - version: str, ) -> str: # pragma: no cover r"""Get owner of file on backend. @@ -590,15 +684,44 @@ def owner( 'doctest' """ - path = utils.check_path(path) - version = utils.check_version(version) + path_with_version = self._path_with_version(path, version) return utils.call_function_on_backend( self._owner, - path, - version, + path_with_version, ) + def _path_with_version( + self, + path: str, + version: str, + ) -> str: + r"""Convert to versioned path. + + / + -> + // + + or legacy: + + / + -> + ///- + + """ + path = utils.check_path(path) + version = utils.check_version(version) + + root, name = self.split(path) + + if self._legacy_file_structure: + base, ext = self._legacy_split_ext(name) + path = self.join(root, base, version, f'{base}-{version}{ext}') + else: + path = self.join(root, version, name) + + return path + def put_archive( self, src_root: str, @@ -684,7 +807,6 @@ def _put_file( self, src_path: str, dst_path: str, - version: str, checksum: str, verbose: bool, ): # pragma: no cover @@ -731,8 +853,7 @@ def put_file( True """ - dst_path = utils.check_path(dst_path) - version = utils.check_version(version) + dst_path_with_version = self._path_with_version(dst_path, version) if not os.path.exists(src_path): utils.raise_file_not_found_error(src_path) @@ -749,8 +870,7 @@ def put_file( utils.call_function_on_backend( self._put_file, src_path, - dst_path, - version, + dst_path_with_version, checksum, verbose, ) @@ -758,7 +878,6 @@ def put_file( def _remove_file( self, path: str, - version: str, ): # pragma: no cover r"""Remove file from backend.""" raise NotImplementedError() @@ -790,13 +909,11 @@ def remove_file( False """ - path = utils.check_path(path) - version = utils.check_version(version) + path_with_version = self._path_with_version(path, version) utils.call_function_on_backend( self._remove_file, - path, - version, + path_with_version, ) @property @@ -870,3 +987,55 @@ def versions( paths = self.ls(path, suppress_backend_errors=suppress_backend_errors) vs = [v for _, v in paths] return vs + + def _use_legacy_file_structure( + self, + *, + extensions: typing.List[str] = None, + regex: bool = False, + ): + r"""Use legacy file structure. + + Stores files under + ``'...///-.'`` + instead of + ``'...//'``. + By default, + the extension + ```` + is set to the string after the last dot. + I.e., + the backend path + ``'.../file.tar.gz'`` + will translate into + ``'.../file.tar/1.0.0/file.tar-1.0.0.gz'``. + However, + by passing a list with custom extensions + it is possible to overwrite + the default behavior + for certain extensions. + E.g., + with + ``backend._use_legacy_file_structure(extensions=['tar.gz'])`` + it is ensured that + ``'tar.gz'`` + will be recognized as an extension + and the backend path + ``'.../file.tar.gz'`` + will then translate into + ``'.../file/1.0.0/file-1.0.0.tar.gz'``. + If ``regex`` is set to ``True``, + the extensions are treated as regular expressions. + E.g. + with + ``backend._use_legacy_file_structure(extensions=['\d+.tar.gz'], + regex=True)`` + the backend path + ``'.../file.99.tar.gz'`` + will translate into + ``'.../file/1.0.0/file-1.0.0.99.tar.gz'``. + + """ + self._legacy_file_structure = True + self._legacy_extensions = extensions or [] + self._legacy_file_structure_regex = regex diff --git a/audbackend/core/conftest.py b/audbackend/core/conftest.py index b9347a77..4492f46d 100644 --- a/audbackend/core/conftest.py +++ b/audbackend/core/conftest.py @@ -18,7 +18,6 @@ def __repr__(self) -> str: def _date( self, path: str, - version: str, ) -> str: date = datetime.datetime(1991, 2, 20) date = audbackend.core.utils.date_format(date) @@ -27,7 +26,6 @@ def _date( def _owner( self, path: str, - version: str, ) -> str: return 'doctest' diff --git a/audbackend/core/filesystem.py b/audbackend/core/filesystem.py index 52ed8319..7f5f3ba2 100644 --- a/audbackend/core/filesystem.py +++ b/audbackend/core/filesystem.py @@ -1,6 +1,5 @@ import datetime import os -import re import shutil import typing @@ -27,12 +26,6 @@ def __init__( self._root = audeer.path(host, repository) + os.sep - # to support legacy file structure - # see _use_legacy_file_structure() - self._legacy_extensions = [] - self._legacy_file_structure = False - self._legacy_file_structure_regex = False - def _access( self, ): @@ -43,10 +36,9 @@ def _access( def _checksum( self, path: str, - version: str, ) -> str: r"""MD5 checksum of file on backend.""" - path = self._path(path, version) + path = self._expand(path) return audeer.md5(path) def _collapse( @@ -76,10 +68,9 @@ def _create( def _date( self, path: str, - version: str, ) -> str: r"""Get last modification date of file on backend.""" - path = self._path(path, version) + path = self._expand(path) date = os.path.getmtime(path) date = datetime.datetime.fromtimestamp(date) date = utils.date_format(date) @@ -94,10 +85,9 @@ def _delete( def _exists( self, path: str, - version: str, ) -> bool: r"""Check if file exists on backend.""" - path = self._path(path, version) + path = self._expand(path) return os.path.exists(path) def _expand( @@ -121,210 +111,54 @@ def _get_file( self, src_path: str, dst_path: str, - version: str, verbose: bool, ): r"""Get file from backend.""" - src_path = self._path(src_path, version) + src_path = self._expand(src_path) shutil.copy(src_path, dst_path) - def _legacy_split_ext( - self, - name: str, - ) -> typing.Tuple[str, str]: - r"""Split name into basename and extension.""" - ext = None - for custom_ext in self._legacy_extensions: - # check for custom extension - # ensure basename is not empty - if self._legacy_file_structure_regex: - pattern = rf'\.({custom_ext})$' - match = re.search(pattern, name[1:]) - if match: - ext = match.group(1) - elif name[1:].endswith(f'.{custom_ext}'): - ext = custom_ext - if ext is None: - # if no custom extension is found - # use last string after dot - ext = audeer.file_extension(name) - - base = audeer.replace_file_extension(name, '', ext=ext) - - if ext: - ext = f'.{ext}' - - return base, ext - def _ls( self, path: str, - ) -> typing.List[typing.Tuple[str, str]]: - r"""List all files under (sub-)path.""" - if path.endswith('/'): # find files under sub-path - - path = self._expand(path) - if not os.path.exists(path): - utils.raise_file_not_found_error(path) - paths = audeer.list_file_names( - path, - recursive=True, - hidden=True, - ) - - else: # find versions of path - - root, name = self.split(path) - - if self._legacy_file_structure: - base, _ = self._legacy_split_ext(name) - root = f'{self._expand(root)}{base}' - else: - root = self._expand(root) - - vs = audeer.list_dir_names( - root, - basenames=True, - hidden=True, - ) - - # filter out other files with same root and version - paths = [self._path(path, v) for v in vs if self._exists(path, v)] - - if not paths: - utils.raise_file_not_found_error(path) - - # //// - # -> - # (//, ) - - result = [] - for p in paths: - - p = self._collapse(p) # remove host and repo - tokens = p.split(self.sep) - - name = tokens[-1] - version = tokens[-2] - - if self._legacy_file_structure: - base = tokens[-3] - ext = name[len(base) + len(version) + 1:] - name = f'{base}{ext}' - path = self.sep.join(tokens[:-3]) - else: - path = self.sep.join(tokens[:-2]) - - path = self.sep + path - path = self.join(path, name) - - result.append((path, version)) + ) -> typing.List[str]: + r"""List all files under sub-path.""" + path = self._expand(path) + if not os.path.exists(path): + utils.raise_file_not_found_error(path) + paths = audeer.list_file_names( + path, + recursive=True, + hidden=True, + ) + paths = [self._collapse(path) for path in paths] - return result + return paths def _owner( self, path: str, - version: str, ) -> str: r"""Get owner of file on backend.""" - path = self._path(path, version) + path = self._expand(path) owner = utils.file_owner(path) return owner - def _path( - self, - path: str, - version: str, - ) -> str: - r"""Convert to backend path. - - /// - -> - //// - - """ - root, name = self.split(path) - root = self._expand(root) - - if self._legacy_file_structure: - base, ext = self._legacy_split_ext(name) - path = os.path.join(root, base, version, f'{base}-{version}{ext}') - else: - path = os.path.join(root, version, name) - - return path - def _put_file( self, src_path: str, dst_path: str, - version: str, checksum: str, verbose: bool, ): r"""Put file to backend.""" - dst_path = self._path(dst_path, version) + dst_path = self._expand(dst_path) audeer.mkdir(os.path.dirname(dst_path)) shutil.copy(src_path, dst_path) def _remove_file( self, path: str, - version: str, ): r"""Remove file from backend.""" - path = self._path(path, version) + path = self._expand(path) os.remove(path) - - def _use_legacy_file_structure( - self, - *, - extensions: typing.List[str] = None, - regex: bool = False, - ): - r"""Use legacy file structure. - - Stores files under - ``'...///-.'`` - instead of - ``'...//'``. - By default, - the extension - ```` - is set to the string after the last dot. - I.e., - the backend path - ``'.../file.tar.gz'`` - will translate into - ``'.../file.tar/1.0.0/file.tar-1.0.0.gz'``. - However, - by passing a list with custom extensions - it is possible to overwrite - the default behavior - for certain extensions. - E.g., - with - ``backend._use_legacy_file_structure(extensions=['tar.gz'])`` - it is ensured that - ``'tar.gz'`` - will be recognized as an extension - and the backend path - ``'.../file.tar.gz'`` - will then translate into - ``'.../file/1.0.0/file-1.0.0.tar.gz'``. - If ``regex`` is set to ``True``, - the extensions are treated as regular expressions. - E.g. - with - ``backend._use_legacy_file_structure(extensions=['\d+.tar.gz'], - regex=True)`` - the backend path - ``'.../file.99.tar.gz'`` - will translate into - ``'.../file/1.0.0/file-1.0.0.99.tar.gz'``. - - """ - self._legacy_file_structure = True - self._legacy_extensions = extensions or [] - self._legacy_file_structure_regex = regex diff --git a/docs/legacy.rst b/docs/legacy.rst index 372bf706..4dec8784 100644 --- a/docs/legacy.rst +++ b/docs/legacy.rst @@ -16,11 +16,7 @@ Legacy backends =============== The file structure on the backend -has changed for -:class:`audbackend.FileSystem` -and :class:`audbackend.Artifactory` -in version 1.0.0 -of :mod:`audbackend`. +has changed with version 1.0.0. Before, a file ``/sub/file.txt`` diff --git a/docs/usage.rst b/docs/usage.rst index 04d46104..185b833a 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -396,7 +396,6 @@ stored on our backend: * ``content``: the binary content * ``date``: the date when the file was added * ``owner``: the owner of the file -* ``version``: the version of the file .. jupyter-execute:: @@ -423,8 +422,7 @@ stored on our backend: content BLOB NOT NULL, date TEXT NOT NULL, owner TEXT NOT NULL, - version TEXT NOT NULL, - PRIMARY KEY (path, version) + PRIMARY KEY (path) ); ''' with self._db as db: @@ -470,14 +468,13 @@ if a file exists. def _exists( self, path: str, - version: str, ) -> bool: with self._db as db: query = f''' SELECT EXISTS ( SELECT 1 FROM data - WHERE path="{path}" AND version="{version}" + WHERE path="{path}" ); ''' result = db.execute(query).fetchone()[0] == 1 @@ -499,7 +496,6 @@ a file to our backend. self, src_path: str, dst_path: str, - version: str, checksum: str, verbose: bool, ): @@ -507,12 +503,12 @@ a file to our backend. with open(src_path, 'rb') as file: content = file.read() query = ''' - INSERT INTO data (path, checksum, content, date, owner, version) - VALUES (?, ?, ?, ?, ?, ?) + INSERT INTO data (path, checksum, content, date, owner) + VALUES (?, ?, ?, ?, ?) ''' owner = getpass.getuser() date = datetime.datetime.today().strftime('%Y-%m-%d') - data = (dst_path, checksum, content, date, owner, version) + data = (dst_path, checksum, content, date, owner) db.execute(query, data) @@ -537,13 +533,12 @@ to access its meta information. def _checksum( self, path: str, - version: str, ) -> str: with self._db as db: query = f''' SELECT checksum FROM data - WHERE path="{path}" AND version="{version}" + WHERE path="{path}" ''' checksum = db.execute(query).fetchone()[0] return checksum @@ -556,13 +551,12 @@ to access its meta information. def _date( self, path: str, - version: str, ) -> str: with self._db as db: query = f''' SELECT date FROM data - WHERE path="{path}" AND version="{version}" + WHERE path="{path}" ''' date = db.execute(query).fetchone()[0] return date @@ -575,13 +569,12 @@ to access its meta information. def _owner( self, path: str, - version: str, ) -> str: with self._db as db: query = f''' SELECT owner FROM data - WHERE path="{path}" AND version="{version}" + WHERE path="{path}" ''' owner = db.execute(query).fetchone()[0] return owner @@ -601,14 +594,13 @@ from the backend. self, src_path: str, dst_path: str, - version: str, verbose: bool, ): with self._db as db: query = f''' SELECT content FROM data - WHERE path="{src_path}" AND version="{version}" + WHERE path="{src_path}" ''' content = db.execute(query).fetchone()[0] with open(dst_path, 'wb') as fp: @@ -637,28 +629,19 @@ we provide a listing method. def _ls( self, path: str, - ) -> typing.List[typing.Tuple[str, str]]: + ) -> typing.List[str]: with self._db as db: - if path.endswith('/'): - # path is sub-path; - # list all files and versions under sub-path - query = f''' - SELECT path, version - FROM data - WHERE path - LIKE ? || "%" - ''' - ls = db.execute(query, [path]).fetchall() - else: - # path is file - # list all versions of file - query = f''' - SELECT path, version - FROM data - WHERE path="{path}" - ''' - ls = db.execute(query).fetchall() + + # list all files and versions under sub-path + query = f''' + SELECT path + FROM data + WHERE path + LIKE ? || "%" + ''' + ls = db.execute(query, [path]).fetchall() + ls = [x[0] for x in ls] if not ls and not path == '/': # path has to exists if not root @@ -667,6 +650,7 @@ we provide a listing method. os.strerror(errno.ENOENT), path, ) + return ls @@ -691,13 +675,12 @@ requires another method. def _remove_file( self, path: str, - version: str, ): with self._db as db: query = f''' DELETE FROM data - WHERE path="{path}" AND version="{version}" + WHERE path="{path}" ''' db.execute(query) diff --git a/tests/singlefolder.py b/tests/singlefolder.py index 084d930d..a85ae0d2 100644 --- a/tests/singlefolder.py +++ b/tests/singlefolder.py @@ -18,8 +18,7 @@ class SingleFolder(audbackend.Backend): A serialized dictionary stores the dependency between backend path and the names. - It also stores the version - and checksum for every file. + It also stores checksum for every file. """ class Map: @@ -75,10 +74,9 @@ def _access( def _checksum( self, path: str, - version: str, ) -> str: with self.Map(self._path, self._lock) as m: - return m[path][version][1] + return m[path][1] def _create( self, @@ -91,10 +89,9 @@ def _create( def _date( self, path: str, - version: str, ) -> str: with self.Map(self._path, self._lock) as m: - p = m[path][version][0] + p = m[path][0] date = os.path.getmtime(p) date = datetime.datetime.fromtimestamp(date) date = audbackend.core.utils.date_format(date) @@ -111,40 +108,31 @@ def _delete( def _exists( self, path: str, - version: str, ) -> bool: with self.Map(self._path, self._lock) as m: - return path in m and version in m[path] + return path in m def _get_file( self, src_path: str, dst_path: str, - version: str, verbose: bool, ): with self.Map(self._path, self._lock) as m: - shutil.copy(m[src_path][version][0], dst_path) + shutil.copy(m[src_path][0], dst_path) def _ls( self, path: str, - ) -> typing.List[typing.Tuple[str, str]]: + ) -> typing.List[str]: with self.Map(self._path, self._lock) as m: ls = [] - if path.endswith('/'): - for p in m: - if p.startswith(path): - for v in m[p]: - ls.append((p, v)) - else: - for p in m: - if p == path: - for v in m[p]: - ls.append((p, v)) + for p in m: + if p.startswith(path): + ls.append(p) if not ls and not path == '/': raise audbackend.core.utils.raise_file_not_found_error(path) @@ -154,17 +142,15 @@ def _ls( def _owner( self, path: str, - version: str, ): with self.Map(self._path, self._lock) as m: - p = m[path][version][0] + p = m[path][0] return audbackend.core.utils.file_owner(p) def _put_file( self, src_path: str, dst_path: str, - version: str, checksum: str, verbose: bool, ): @@ -172,21 +158,16 @@ def _put_file( if dst_path not in m: m[dst_path] = {} - - if version not in m[dst_path]: p = audeer.path(self._root, audeer.uid()[:8]) - m[dst_path][version] = (p, checksum) + m[dst_path] = (p, checksum) - shutil.copy(src_path, m[dst_path][version][0]) + shutil.copy(src_path, m[dst_path][0]) def _remove_file( self, path: str, - version: str, ): with self.Map(self._path, self._lock) as m: - os.remove(m[path][version][0]) - m[path].pop(version) - if not m[path]: - m.pop(path) + os.remove(m[path][0]) + m.pop(path) diff --git a/tests/test_artifactory.py b/tests/test_artifactory.py index 1aea0a9b..367ae8e9 100644 --- a/tests/test_artifactory.py +++ b/tests/test_artifactory.py @@ -212,6 +212,6 @@ def test_legacy_file_structure(tmpdir, backend, file, version, extensions, backend.put_file(src_path, file, version) url = f'{str(backend._repo.path)}{expected}' - assert str(backend._path(file, version)) == url + assert backend._expand(backend._path_with_version(file, version)) == url assert backend.ls(file) == [(file, version)] assert backend.ls() == [(file, version)] diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index 150ccb46..228afb0c 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -139,6 +139,6 @@ def test_legacy_file_structure(tmpdir, backend, file, version, extensions, backend.put_file(src_path, file, version) path = os.path.join(backend._root, expected) - assert str(backend._path(file, version)) == path + assert backend._expand(backend._path_with_version(file, version)) == path assert backend.ls(file) == [(file, version)] assert backend.ls() == [(file, version)]