Skip to content

Commit

Permalink
Revert "Add audbackend.checksum() (#245)"
Browse files Browse the repository at this point in the history
This reverts commit 1d0c713.
  • Loading branch information
hagenw committed Nov 22, 2024
1 parent e05f1cd commit 43d3ecc
Show file tree
Hide file tree
Showing 9 changed files with 13 additions and 180 deletions.
1 change: 0 additions & 1 deletion audbackend/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from audbackend.core.backend.base import Base as Backend # legacy
from audbackend.core.backend.filesystem import FileSystem # legacy
from audbackend.core.errors import BackendError
from audbackend.core.utils import checksum
from audbackend.core.repository import Repository

# Import optional backends (legacy)
Expand Down
8 changes: 4 additions & 4 deletions audbackend/core/backend/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ def _assert_equal_checksum(
"""
if path_is_local:
checksum = utils.checksum(path)
checksum = audeer.md5(path)
else:
checksum = self.checksum(path)

if path_ref_is_local:
checksum_ref = utils.checksum(path_ref)
checksum_ref = audeer.md5(path_ref)
else:
checksum_ref = self.checksum(path_ref)

Expand Down Expand Up @@ -569,7 +569,7 @@ def get_file(
msg = f"Permission denied: '{dst_path}'"
raise PermissionError(msg)

if not os.path.exists(dst_path) or utils.checksum(dst_path) != self.checksum(
if not os.path.exists(dst_path) or audeer.md5(dst_path) != self.checksum(
src_path
):
# get file to a temporary directory first,
Expand Down Expand Up @@ -1042,7 +1042,7 @@ def put_file(
elif os.path.isdir(src_path):
raise utils.raise_is_a_directory(src_path)

checksum = utils.checksum(src_path)
checksum = audeer.md5(src_path)

# skip if file with same checksum already exists
if not self.exists(dst_path) or self.checksum(dst_path) != checksum:
Expand Down
2 changes: 1 addition & 1 deletion audbackend/core/backend/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def _checksum(
) -> str:
r"""MD5 checksum of file on backend."""
path = self._expand(path)
return utils.checksum(path)
return audeer.md5(path)

def _collapse(
self,
Expand Down
2 changes: 1 addition & 1 deletion audbackend/core/interface/versioned.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def checksum(
Examples:
>>> file = "src.txt"
>>> import audeer
>>> audbackend.checksum(file)
>>> audeer.md5(file)
'd41d8cd98f00b204e9800998ecf8427e'
>>> interface.put_file(file, "/file.txt", "1.0.0")
>>> interface.checksum("/file.txt", "1.0.0")
Expand Down
58 changes: 0 additions & 58 deletions audbackend/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import re
import time

import audeer

from audbackend.core.errors import BackendError


Expand Down Expand Up @@ -106,62 +104,6 @@ def check_version(version: str) -> str:
return version


def checksum(file: str) -> str:
r"""Checksum of file.
This function is used by backends
to get the checksum of local files,
using :func:`audeer.md5`.
An exception are parquet files,
for which their ``"hash"`` metadata entry
is used as checksum,
if the entry is available
and pyarrow_ is installed.
.. _pyarrow: https://arrow.apache.org/docs/python/index.html
Args:
file: file path with extension
Returns:
MD5 checksum of file
Raises:
FileNotFoundError: if ``file`` does not exist
Examples:
>>> checksum("src.txt")
'd41d8cd98f00b204e9800998ecf8427e'
>>> import audformat
>>> import pandas as pd
>>> import pyarrow as pa
>>> import pyarrow.parquet as pq
>>> df = pd.DataFrame([0, 1], columns=["a"])
>>> hash = audformat.utils.hash(df, strict=True)
>>> hash
'9021a9b6e1e696ba9de4fe29346319b2'
>>> parquet_file = audeer.path("file.parquet")
>>> table = pa.Table.from_pandas(df)
>>> table = table.replace_schema_metadata({"hash": hash})
>>> pq.write_table(table, parquet_file, compression="snappy")
>>> checksum(parquet_file)
'9021a9b6e1e696ba9de4fe29346319b2'
"""
ext = audeer.file_extension(file)
if ext == "parquet":
try:
import pyarrow.parquet as parquet

metadata = parquet.read_schema(file).metadata or {}
if b"hash" in metadata:
return metadata[b"hash"].decode()
except ModuleNotFoundError:
pass
return audeer.md5(file)


def date_format(date: datetime.datetime) -> str:
return date.strftime("%Y-%m-%d")

Expand Down
5 changes: 4 additions & 1 deletion docs/api-src/audbackend.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,7 @@ and functions are available.

BackendError
Repository
checksum
access
create
delete
register
1 change: 0 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
autodoc_inherit_docstrings = False # disable docstring inheritance
intersphinx_mapping = {
"audeer": ("https://audeering.github.io/audeer/", None),
"audformat": ("https://audeering.github.io/audformat/", None),
"pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
"python": ("https://docs.python.org/3/", None),
}
Expand Down
4 changes: 3 additions & 1 deletion tests/bad_file_system.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import audeer

import audbackend


Expand All @@ -14,7 +16,7 @@ def put_file(
verbose: bool = False,
):
r"""Put file on backend."""
checksum = audbackend.checksum(src_path)
checksum = audeer.md5(src_path)
audbackend.core.utils.call_function_on_backend(
self._put_file,
src_path,
Expand Down
112 changes: 0 additions & 112 deletions tests/test_utils.py

This file was deleted.

0 comments on commit 43d3ecc

Please sign in to comment.