From 28f34d3bd2ef718199f62943b169c8d689006e26 Mon Sep 17 00:00:00 2001 From: Richard Barnes Date: Wed, 27 Mar 2024 08:36:26 -0700 Subject: [PATCH] remove{suffix,prefix} replaces {l,r}strip in pytorch/data/torchdata/datapipes/iter/util/xzfileloader.py +1 Summary: `x.lstrip("string")` is equivalent to `re.sub(r"[string]", "", x)`. This means that `"fbcode/fbcode_file".lstrip("fbcode/")` returns `_file`. This can easily be an unintended behaviour! Instead, as of Python 3.10, we should use `string.removeprefix` or `string.removesuffix`. This diff makes one or more such fixes that we believe are likely to be bugs. Please double-check before approving. Reviewed By: zsol Differential Revision: D55386359 fbshipit-source-id: 662215018e025ed4aa14e0825950926b047b3dfc --- torchdata/datapipes/iter/util/xzfileloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchdata/datapipes/iter/util/xzfileloader.py b/torchdata/datapipes/iter/util/xzfileloader.py index 9224fc8de..528f7e4e3 100644 --- a/torchdata/datapipes/iter/util/xzfileloader.py +++ b/torchdata/datapipes/iter/util/xzfileloader.py @@ -53,7 +53,7 @@ def __iter__(self) -> Iterator[Tuple[str, BufferedIOBase]]: pathname, data_stream = data try: extracted_fobj = lzma.open(data_stream, mode="rb") # type: ignore[call-overload] - new_pathname = pathname.rstrip(".xz") + new_pathname = pathname.removesuffix(".xz") yield new_pathname, StreamWrapper(extracted_fobj, data_stream, name=pathname) # type: ignore[misc] except Exception as e: warnings.warn(f"Unable to extract files from corrupted xz/lzma stream {pathname} due to: {e}, abort!")