From a07ae58d58821b8210a9f9da845dcc44b0b4407c Mon Sep 17 00:00:00 2001 From: Brian Larsen Date: Fri, 12 May 2023 00:31:13 -0500 Subject: [PATCH] Various changes (#96) * Improve some error messages by including the string value of the path so it is clear what failed. * Cleaned up notebook for clarity and brevity, also added table of fsspec supported filesystems. * Fixed bug causing incorrect rglob results (top level glob matches were not included). * GitHub Actions: Only run on PRs and pushes to main and tagged commits, to fix duplicate runs. * GitHub Actions: Don't fail fast, so that multiple failures can be reviewed after each run. * GitHub Actions: Run on "3.11" rather than "3.11-dev". --------- Co-authored-by: Andreas Poehlmann --- .github/workflows/python.yml | 7 +- README.md | 2 +- notebooks/examples.ipynb | 443 ++++++++++------------ upath/__init__.py | 3 +- upath/core.py | 106 +++--- upath/errors.py | 1 + upath/implementations/cloud.py | 16 +- upath/implementations/hdfs.py | 2 +- upath/registry.py | 6 +- upath/tests/cases.py | 30 +- upath/tests/implementations/test_azure.py | 4 + upath/tests/implementations/test_http.py | 8 + 12 files changed, 328 insertions(+), 300 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 2c49782e..170968e9 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -1,13 +1,18 @@ name: Python package on: push: + branches: + - main + tags: + - v*.*.* pull_request: jobs: tests: runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, "3.10", "3.11-dev"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] os: [ubuntu-latest, windows-latest] steps: - uses: actions/checkout@v3 diff --git a/README.md b/README.md index 8ff9f7cc..9b25b739 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Universal Pathlib is a python library that aims to extend Python's built-in [`pa ### Pypi ```bash -pip install universal_pathlib +python -m pip install universal_pathlib ``` ### conda diff --git a/notebooks/examples.ipynb b/notebooks/examples.ipynb index de0fc200..b17ebd05 100644 --- a/notebooks/examples.ipynb +++ b/notebooks/examples.ipynb @@ -13,7 +13,13 @@ }, "outputs": [], "source": [ - "from upath import UPath" + "import pathlib\n", + "import warnings\n", + "from tempfile import NamedTemporaryFile\n", + "\n", + "from upath import UPath\n", + "\n", + "warnings.filterwarnings(action=\"ignore\", message=\"UPath .*\", module=\"upath.core\")" ] }, { @@ -25,9 +31,9 @@ } }, "source": [ - "### local filesystem\n", + "# Local Filesystem\n", "\n", - "If you give a local path, `UPath` defaults to `pathlib.PosixPath` or `pathlib.WindowsPath`" + "If you give a local path, UPath defaults to `pathlib.PosixPath` or `pathlib.WindowsPath`, just as `pathlib.Path`." ] }, { @@ -42,10 +48,17 @@ } }, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/tmp/tmpdeaokyh7 \n" + ] + }, { "data": { "text/plain": [ - "PosixPath('/tmp')" + "PosixPath('/tmp/tmpdeaokyh7')" ] }, "execution_count": 2, @@ -54,11 +67,15 @@ } ], "source": [ - "local_path = UPath('/tmp')\n", + "tmp = NamedTemporaryFile()\n", + "print(tmp.name, type(tmp.name))\n", + "local_path = UPath(tmp.name)\n", + "assert isinstance(local_path, (pathlib.PosixPath, pathlib.WindowsPath))\n", "local_path" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "ein.tags": "worksheet-0", @@ -83,30 +100,32 @@ }, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "upath/upath/registry.py:33: UserWarning: file filesystem path not explicitly implemented. falling back to default implementation. This filesystem may not be tested\n", - " warnings.warn(warning_str, UserWarning)\n" + "local_uri='file:///tmp/tmpdeaokyh7'\n", + "local_upath=UPath('file:/tmp/tmpdeaokyh7')\n", + "type(local_upath)=\n", + "type(local_upath.fs)=\n" ] - }, - { - "data": { - "text/plain": [ - "UPath('file:/tmp')" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "local_upath = UPath('file:/tmp')\n", - "local_upath" + "local_uri = local_path.absolute().as_uri()\n", + "print(f\"{local_uri=}\")\n", + "\n", + "local_upath = UPath(local_uri)\n", + "print(f\"{local_upath=}\")\n", + "\n", + "print(f\"{type(local_upath)=}\")\n", + "assert isinstance(local_upath, UPath)\n", + "\n", + "print(f\"{type(local_upath.fs)=}\")\n", + "tmp.close()" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "ein.tags": "worksheet-0", @@ -115,11 +134,11 @@ } }, "source": [ - "### fsspec filesystems\n", + "# `fsspec` FileSystems\n", "\n", - "with `UPath` you can connect to any fsspec FileSystem and interact with it in with it as you would with your local filesystem using pathlib. Connection arguments can be given in a couple of ways:\n", + "With `UPath` you can connect to any `fsspec` FileSystem and interact with it in with it as you would with your local filesystem using `pathlib`. Connection arguments can be given in a couple of ways:\n", "\n", - "You can give them as keyword arguments as described for each filesystem in the fsspec docs:" + "You can give them as keyword arguments as described in the `fsspec` [docs](https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations) for each filesystem implementation:" ] }, { @@ -133,12 +152,26 @@ "slide_type": "-" } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "ghpath = UPath('github:/', org='fsspec', repo='universal_pathlib', sha='main')" + "ghpath = UPath('github:/', org='fsspec', repo='universal_pathlib', sha='main')\n", + "assert ghpath.exists()\n", + "ghpath.fs" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "ein.tags": "worksheet-0", @@ -147,7 +180,7 @@ } }, "source": [ - "or define them in the path/url, in which case they will be appropriately parsed:" + "Or define them in the path/url, in which case they will be appropriately parsed:" ] }, { @@ -165,7 +198,7 @@ { "data": { "text/plain": [ - "GithubPath('github://fsspec:universal_pathlib@main/')" + "UPath('github://fsspec:universal_pathlib@main/')" ] }, "execution_count": 5, @@ -179,10 +212,11 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "With a `UPath` object instantiated, you can now interact with the paths with the usual `pathlib.Path` API" + "With a `UPath` object instantiated, you can now interact with the paths with the usual `pathlib.Path` API." ] }, { @@ -201,30 +235,22 @@ "name": "stdout", "output_type": "stream", "text": [ - "github://fsspec:universal_pathlib@main/.flake8\n", - "github://fsspec:universal_pathlib@main/.github\n", - "github://fsspec:universal_pathlib@main/.gitignore\n", - "github://fsspec:universal_pathlib@main/LICENSE\n", - "github://fsspec:universal_pathlib@main/README.md\n", - "github://fsspec:universal_pathlib@main/environment.yml\n", - "github://fsspec:universal_pathlib@main/notebooks\n", - "github://fsspec:universal_pathlib@main/noxfile.py\n", - "github://fsspec:universal_pathlib@main/pyproject.toml\n", - "github://fsspec:universal_pathlib@main/setup.py\n", - "github://fsspec:universal_pathlib@main/upath\n" + "github://fsspec:universal_pathlib@main/.flake8\n" ] } ], "source": [ "for p in ghpath.iterdir():\n", - " print(p)" + " print(p)\n", + " break" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "The `glob` method is also available for most filesystems. Note the syntax here is as defined in `fsspec`, rather than that of pathlib. " + "All the standard path methods and attributes of [`pathlib.Path`](https://docs.python.org/3/library/pathlib.html#pathlib.Path) are available too:" ] }, { @@ -240,44 +266,27 @@ }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "github://fsspec:universal_pathlib@main/noxfile.py\n", - "github://fsspec:universal_pathlib@main/setup.py\n", - "github://fsspec:universal_pathlib@main/upath/__init__.py\n", - "github://fsspec:universal_pathlib@main/upath/core.py\n", - "github://fsspec:universal_pathlib@main/upath/errors.py\n", - "github://fsspec:universal_pathlib@main/upath/implementations/__init__.py\n", - "github://fsspec:universal_pathlib@main/upath/implementations/cloud.py\n", - "github://fsspec:universal_pathlib@main/upath/implementations/hdfs.py\n", - "github://fsspec:universal_pathlib@main/upath/implementations/http.py\n", - "github://fsspec:universal_pathlib@main/upath/implementations/memory.py\n", - "github://fsspec:universal_pathlib@main/upath/registry.py\n", - "github://fsspec:universal_pathlib@main/upath/tests/__init__.py\n", - "github://fsspec:universal_pathlib@main/upath/tests/cases.py\n", - "github://fsspec:universal_pathlib@main/upath/tests/conftest.py\n", - "github://fsspec:universal_pathlib@main/upath/tests/implementations/__init__.py\n", - "github://fsspec:universal_pathlib@main/upath/tests/implementations/test_gcs.py\n", - "github://fsspec:universal_pathlib@main/upath/tests/implementations/test_hdfs.py\n", - "github://fsspec:universal_pathlib@main/upath/tests/implementations/test_http.py\n", - "github://fsspec:universal_pathlib@main/upath/tests/implementations/test_memory.py\n", - "github://fsspec:universal_pathlib@main/upath/tests/implementations/test_s3.py\n", - "github://fsspec:universal_pathlib@main/upath/tests/test_core.py\n", - "github://fsspec:universal_pathlib@main/upath/tests/utils.py\n" - ] + "data": { + "text/plain": [ + "UPath('github://fsspec:universal_pathlib@main/README.md')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "for p in ghpath.glob('**.py'):\n", - " print(p)" + "readme_path = ghpath / \"README.md\"\n", + "readme_path" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "All the standard path methods and attributes of `pathlib.Path` are available too:" + "To get the full path as a string use:" ] }, { @@ -295,7 +304,7 @@ { "data": { "text/plain": [ - "GithubPath('github://fsspec:universal_pathlib@main/README.md')" + "'github://fsspec:universal_pathlib@main/README.md'" ] }, "execution_count": 8, @@ -304,15 +313,15 @@ } ], "source": [ - "readme_path = ghpath / 'README.md'\n", - "readme_path" + "str(readme_path)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "To get the full path as a string use:" + "You can also use the path attribute to get just the path:" ] }, { @@ -330,7 +339,7 @@ { "data": { "text/plain": [ - "'github://fsspec:universal_pathlib@main/README.md'" + "'/README.md'" ] }, "execution_count": 9, @@ -339,14 +348,8 @@ } ], "source": [ - "str(readme_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also use the path attribute to get just the path:" + "# path attribute added\n", + "readme_path.path" ] }, { @@ -364,7 +367,7 @@ { "data": { "text/plain": [ - "'/README.md'" + "('README.md', 'README', '.md')" ] }, "execution_count": 10, @@ -373,8 +376,7 @@ } ], "source": [ - "# path attribute added\n", - "readme_path.path" + "readme_path.name, readme_path.stem, readme_path.suffix" ] }, { @@ -392,7 +394,7 @@ { "data": { "text/plain": [ - "'README.md'" + "'# Universal Pathlib'" ] }, "execution_count": 11, @@ -401,7 +403,7 @@ } ], "source": [ - "readme_path.name" + "readme_path.read_text().splitlines()[0]" ] }, { @@ -415,20 +417,9 @@ "slide_type": "-" } }, - "outputs": [ - { - "data": { - "text/plain": [ - "'README'" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "readme_path.stem" + "s3path = UPath(\"s3://spacenet-dataset\")" ] }, { @@ -444,31 +435,32 @@ }, "outputs": [ { - "data": { - "text/plain": [ - "'.md'" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "s3://spacenet-dataset/LICENSE.md\n" + ] } ], "source": [ - "readme_path.suffix" + "for p in s3path.iterdir():\n", + " if p.is_file():\n", + " print(p)\n", + " break" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can chain paths with the `/` operator and any methods." ] }, { "cell_type": "code", "execution_count": 14, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, + "metadata": {}, "outputs": [ { "data": { @@ -482,186 +474,152 @@ } ], "source": [ - "readme_path.exists()" + "(s3path / \"LICENSE.md\").exists()" ] }, { "cell_type": "code", "execution_count": 15, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, + "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "'# Universal Pathlib'" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "The \"SpaceNet Dataset\"\n" + ] } ], "source": [ - "readme_path.read_text()[:19]" + "with (s3path / \"LICENSE.md\").open(\"rt\", encoding=\"utf-8\") as f:\n", + " print(f.read(22))" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "Some filesystems may require extra imports to use." + "The `glob` method is also available for most filesystems. Note the syntax here is as detailed in `fsspec` [docs](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.glob), rather than that of `pathlib`." ] }, { "cell_type": "code", "execution_count": 16, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [], - "source": [ - "import s3fs" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [], - "source": [ - "s3path = UPath(\"s3://spacenet-dataset\")" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "s3://spacenet-dataset/LICENSE.md\n", - "s3://spacenet-dataset/\n", - "s3://spacenet-dataset/AOIs\n", - "s3://spacenet-dataset/Hosted-Datasets\n", - "s3://spacenet-dataset/SpaceNet_Off-Nadir_Dataset\n", - "s3://spacenet-dataset/spacenet-model-weights\n", - "s3://spacenet-dataset/spacenet-stac\n", - "s3://spacenet-dataset/spacenet\n" + "s3://spacenet-dataset/AOIs/AOI_3_Paris/MS/16FEB29111913-M2AS_R01C1-055649178040_01_P001.TIF\n" ] } ], "source": [ - "for p in s3path.iterdir():\n", - " print(p)" + "for p in (s3path / \"AOIs\" / \"AOI_3_Paris\").glob(\"**.TIF\"):\n", + " print(p)\n", + " break" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "You can chain paths with the `/` operator and read text or binary contents." + "### Works with fsspec filesystems\n", + "\n", + "Some filesystems may require additional packages to be installed.\n", + "\n", + "Check out some of the known implementations:" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 17, "metadata": {}, "outputs": [ { "data": { + "text/markdown": [ + "| Name | Class |\n", + "| --- | --- |\n", + "| abfs | adlfs.AzureBlobFileSystem |\n", + "| abfss | adlfs.AzureBlobFileSystem |\n", + "| adl | adlfs.AzureDatalakeFileSystem |\n", + "| arrow_hdfs | fsspec.implementations.arrow.HadoopFileSystem |\n", + "| asynclocal | morefs.asyn_local.AsyncLocalFileSystem |\n", + "| az | adlfs.AzureBlobFileSystem |\n", + "| blockcache | fsspec.implementations.cached.CachingFileSystem |\n", + "| cached | fsspec.implementations.cached.CachingFileSystem |\n", + "| dask | fsspec.implementations.dask.DaskWorkerFileSystem |\n", + "| dbfs | fsspec.implementations.dbfs.DatabricksFileSystem |\n", + "| dir | fsspec.implementations.dirfs.DirFileSystem |\n", + "| dropbox | dropboxdrivefs.DropboxDriveFileSystem |\n", + "| dvc | dvc.api.DVCFileSystem |\n", + "| file | fsspec.implementations.local.LocalFileSystem |\n", + "| filecache | fsspec.implementations.cached.WholeFileCacheFileSystem |\n", + "| ftp | fsspec.implementations.ftp.FTPFileSystem |\n", + "| gcs | gcsfs.GCSFileSystem |\n", + "| gdrive | gdrivefs.GoogleDriveFileSystem |\n", + "| generic | fsspec.generic.GenericFileSystem |\n", + "| git | fsspec.implementations.git.GitFileSystem |\n", + "| github | fsspec.implementations.github.GithubFileSystem |\n", + "| gs | gcsfs.GCSFileSystem |\n", + "| hdfs | fsspec.implementations.arrow.HadoopFileSystem |\n", + "| hf | huggingface_hub.HfFileSystem |\n", + "| http | fsspec.implementations.http.HTTPFileSystem |\n", + "| https | fsspec.implementations.http.HTTPFileSystem |\n", + "| jlab | fsspec.implementations.jupyter.JupyterFileSystem |\n", + "| jupyter | fsspec.implementations.jupyter.JupyterFileSystem |\n", + "| libarchive | fsspec.implementations.libarchive.LibArchiveFileSystem |\n", + "| memory | fsspec.implementations.memory.MemoryFileSystem |\n", + "| oci | ocifs.OCIFileSystem |\n", + "| oss | ossfs.OSSFileSystem |\n", + "| reference | fsspec.implementations.reference.ReferenceFileSystem |\n", + "| root | fsspec_xrootd.XRootDFileSystem |\n", + "| s3 | s3fs.S3FileSystem |\n", + "| s3a | s3fs.S3FileSystem |\n", + "| sftp | fsspec.implementations.sftp.SFTPFileSystem |\n", + "| simplecache | fsspec.implementations.cached.SimpleCacheFileSystem |\n", + "| smb | fsspec.implementations.smb.SMBFileSystem |\n", + "| ssh | fsspec.implementations.sftp.SFTPFileSystem |\n", + "| tar | fsspec.implementations.tar.TarFileSystem |\n", + "| wandb | wandbfs.WandbFS |\n", + "| webdav | webdav4.fsspec.WebdavFileSystem |\n", + "| webhdfs | fsspec.implementations.webhdfs.WebHDFS |\n", + "| zip | fsspec.implementations.zip.ZipFileSystem |" + ], "text/plain": [ - "'The \"SpaceNet Dataset\" is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License. The \"SpaceNet Dataset\" includes all contents of this S3 bucket except for the contents of the \"Hosted-Datasets\" folder and its subfolders.\\n\\nhttps://creativecommons.org/licenses/by-sa/4.0/\\n'" + "" ] }, - "execution_count": 19, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "(s3path / \"LICENSE.md\").read_text()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The \"SpaceNet Dataset\"\n" - ] - } - ], - "source": [ - "with (s3path / \"LICENSE.md\").open(\"rt\", encoding=\"utf-8\") as f:\n", - " print(f.read(22))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Globbing also works for many filesystems." + "from fsspec.registry import known_implementations\n", + "from IPython.display import Markdown, display\n", + "\n", + "known = [\n", + " f\"| {name} | {d['class']} |\" for name, d in sorted(known_implementations.items())\n", + "]\n", + "known = \"\\n\".join([\"| Name | Class |\\n| --- | --- |\", *known])\n", + "display(Markdown(known))" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "s3://spacenet-dataset/AOIs/AOI_3_Paris/MS/16FEB29111913-M2AS_R01C1-055649178040_01_P001.TIF\n", - "s3://spacenet-dataset/AOIs/AOI_3_Paris/MS/16FEB29111913-M2AS_R01C2-055649178040_01_P001.TIF\n", - "s3://spacenet-dataset/AOIs/AOI_3_Paris/MS/16FEB29111913-M2AS_R01C3-055649178040_01_P001.TIF\n", - "s3://spacenet-dataset/AOIs/AOI_3_Paris/MS/16FEB29111913-M2AS_R01C4-055649178040_01_P001.TIF\n", - "s3://spacenet-dataset/AOIs/AOI_3_Paris/MS/16FEB29111913-M2AS_R01C5-055649178040_01_P001.TIF\n" - ] - } - ], - "source": [ - "from itertools import islice \n", - "for p in islice((s3path / \"AOIs\" / \"AOI_3_Paris\").glob(\"**.TIF\"), 5):\n", - " print(p)" - ] + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "fsspec", "language": "python", "name": "python3" }, @@ -675,9 +633,14 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.10" + "version": "3.10.10" }, - "name": "Untitled.ipynb" + "name": "Untitled.ipynb", + "vscode": { + "interpreter": { + "hash": "d4d4510d3a243cfb62b62dec561eb2191aad85ef77736fec7cfe79076e15c84c" + } + } }, "nbformat": 4, "nbformat_minor": 4 diff --git a/upath/__init__.py b/upath/__init__.py index e2f85c2e..59d1006f 100644 --- a/upath/__init__.py +++ b/upath/__init__.py @@ -1,6 +1,7 @@ -"""Pathlib API extended to use fsspec backends""" +"""Pathlib API extended to use fsspec backends.""" __version__ = "0.0.23" from upath.core import UPath + __all__ = ["UPath"] diff --git a/upath/core.py b/upath/core.py index 500efcc5..a7c546e3 100644 --- a/upath/core.py +++ b/upath/core.py @@ -1,10 +1,11 @@ from __future__ import annotations -import pathlib import re import sys from os import PathLike from pathlib import _PosixFlavour # type: ignore +from pathlib import Path +from pathlib import PurePath from typing import Sequence from typing import TypeVar from typing import TYPE_CHECKING @@ -20,6 +21,7 @@ from typing import Any from typing import Generator from urllib.parse import SplitResult + from fsspec.spec import AbstractFileSystem __all__ = [ @@ -53,13 +55,13 @@ def listdir(self, path, **kwargs): p_fmt = self._format_path(path) contents = self._fs.listdir(p_fmt, **kwargs) if len(contents) == 0 and not self._fs.isdir(p_fmt): - raise NotADirectoryError + raise NotADirectoryError(str(self)) elif ( len(contents) == 1 and contents[0]["name"] == p_fmt and contents[0]["type"] == "file" ): - raise NotADirectoryError + raise NotADirectoryError(str(self)) return contents def glob(self, _path, path_pattern, **kwargs): @@ -89,6 +91,17 @@ def makedirs(self, path, exist_ok=False, **kwargs): def touch(self, path, **kwargs): return self._fs.touch(self._format_path(path), **kwargs) + def mv(self, path, target, recursive=False, maxdepth=None, **kwargs): + if hasattr(target, "_accessor"): + target = target._accessor._format_path(target) + return self._fs.mv( + self._format_path(path), + target, + recursive=recursive, + maxdepth=maxdepth, + **kwargs, + ) + class _UriFlavour(_PosixFlavour): def parse_parts(self, parts): @@ -118,7 +131,7 @@ def splitroot(self, part, sep="/"): PT = TypeVar("PT", bound="UPath") -class UPath(pathlib.Path): +class UPath(Path): __slots__ = ( "_url", "_kwargs", @@ -140,7 +153,7 @@ def __new__(cls: type[PT], *args: str | PathLike, **kwargs: Any) -> PT: args_list = list(args) other = args_list.pop(0) - if isinstance(other, pathlib.Path): + if isinstance(other, PurePath): # Create a (modified) copy, if first arg is a Path object _cls: type[Any] = type(other) drv, root, parts = _cls._parse_args(args_list) @@ -161,25 +174,23 @@ def __new__(cls: type[PT], *args: str | PathLike, **kwargs: Any) -> PT: **new_kwargs, ) - else: - url = stringify_path(other) - parsed_url = urlsplit(url) - for key in ["scheme", "netloc"]: - val = kwargs.get(key) - if val: - parsed_url = parsed_url._replace(**{key: val}) - - upath_cls = get_upath_class(protocol=parsed_url.scheme) - if upath_cls is None: - # treat as local filesystem, return PosixPath or WindowsPath - return pathlib.Path(*args, **kwargs) # type: ignore - - else: - # return upath instance - args_list.insert(0, parsed_url.path) - return upath_cls._from_parts( - args_list, url=parsed_url, **kwargs - ) + url = stringify_path(other) + parsed_url = urlsplit(url) + for key in ["scheme", "netloc"]: + val = kwargs.get(key) + if val: + parsed_url = parsed_url._replace(**{key: val}) + + upath_cls = get_upath_class(protocol=parsed_url.scheme) + if upath_cls is None: + # treat as local filesystem, return PosixPath or WindowsPath + return Path(*args, **kwargs) # type: ignore + + args_list.insert(0, parsed_url.path) + # return upath instance + return upath_cls._from_parts( # type: ignore + args_list, url=parsed_url, **kwargs + ) def __getattr__(self, item: str) -> Any: if item == "_accessor": @@ -320,11 +331,13 @@ def glob(self: PT, pattern: str) -> Generator[PT, None, None]: yield self._make_child(name) def rglob(self: PT, pattern: str) -> Generator[PT, None, None]: - path_pattern = self.joinpath("**", pattern) - for name in self._accessor.glob(self, path_pattern): - name = self._sub_path(name) - name = name.split(self._flavour.sep) - yield self._make_child(name) + path_pattern = self.joinpath(pattern) + r_path_pattern = self.joinpath("**", pattern) + for p in (path_pattern, r_path_pattern): + for name in self._accessor.glob(self, p): + name = self._sub_path(name) + name = name.split(self._flavour.sep) + yield self._make_child(name) def _sub_path(self, name): # only want the path name with iterdir @@ -372,9 +385,7 @@ def resolve(self: PT, strict: bool = False) -> PT: ) def exists(self) -> bool: - """ - Whether this path exists. - """ + """Check whether this path exists or not.""" if not getattr(self._accessor, "exists"): try: self._accessor.stat(self) @@ -432,25 +443,32 @@ def is_absolute(self) -> bool: def unlink(self, missing_ok: bool = False) -> None: if not self.exists(): if not missing_ok: - raise FileNotFoundError - else: - return + raise FileNotFoundError(str(self)) + return self._accessor.rm(self, recursive=False) def rmdir(self, recursive: bool = True) -> None: - """Add warning if directory not empty - assert is_dir? - """ if not self.is_dir(): - raise NotADirectoryError + raise NotADirectoryError(str(self)) + if not recursive and next(self.iterdir()): # type: ignore + raise OSError(f"Not recursive and directory not empty: {self}") self._accessor.rm(self, recursive=recursive) def chmod(self, mode, *, follow_symlinks: bool = True) -> None: raise NotImplementedError - def rename(self, target): - # can be implemented, but may be tricky - raise NotImplementedError + def rename(self, target, recursive=False, maxdepth=None, **kwargs): + """Move file, see `fsspec.AbstractFileSystem.mv`.""" + if not isinstance(target, UPath): + target = self.parent.joinpath(target).resolve() + self._accessor.mv( + self, + target, + recursive=recursive, + maxdepth=maxdepth, + **kwargs, + ) + return target def replace(self, target): raise NotImplementedError @@ -508,7 +526,7 @@ def mkdir( """ if parents: if not exist_ok and self.exists(): - raise FileExistsError + raise FileExistsError(str(self)) self._accessor.makedirs(self, exist_ok=exist_ok) else: try: @@ -519,7 +537,7 @@ def mkdir( ) except FileExistsError: if not exist_ok or not self.is_dir(): - raise + raise FileExistsError(str(self)) @classmethod def _from_parts( diff --git a/upath/errors.py b/upath/errors.py index ffceff16..e7c629a1 100644 --- a/upath/errors.py +++ b/upath/errors.py @@ -2,6 +2,7 @@ def __getattr__(name): + """Provide deprecation warning for NotDirectoryError.""" if name == "NotDirectoryError": warnings.warn( "upath.errors.NotDirectoryError is deprecated. " diff --git a/upath/implementations/cloud.py b/upath/implementations/cloud.py index 30a50697..d2f12bf7 100644 --- a/upath/implementations/cloud.py +++ b/upath/implementations/cloud.py @@ -1,6 +1,7 @@ -import upath.core import re +import upath.core + class _CloudAccessor(upath.core._FSSpecAccessor): def _format_path(self, path): @@ -10,12 +11,13 @@ def _format_path(self, path): return f"{path._url.netloc}/{path.path.lstrip('/')}" def mkdir(self, path, create_parents=True, **kwargs): + _path = self._format_path(path) if ( not create_parents and not kwargs.get("exist_ok", False) - and self._fs.exists(self._format_path(path)) + and self._fs.exists(_path) ): - raise FileExistsError + raise FileExistsError(_path) return super().mkdir(path, create_parents=create_parents, **kwargs) @@ -46,8 +48,12 @@ def _sub_path(self, name): relative path to `self`. """ sp = re.escape(self.path) - subed = re.sub(f"^({self._url.netloc})?/?({sp}|{sp[1:]})/?", "", name) - return subed + netloc = self._url.netloc + return re.sub( + f"^({netloc})?/?({sp}|{sp[1:]})/?", + "", + name, + ) def joinpath(self, *args): if self._url.netloc: diff --git a/upath/implementations/hdfs.py b/upath/implementations/hdfs.py index eeda435b..5a28573e 100644 --- a/upath/implementations/hdfs.py +++ b/upath/implementations/hdfs.py @@ -16,7 +16,7 @@ def mkdir(self, path, create_parents=True, **kwargs): return self._fs.makedirs(pth, **kwargs) else: if not kwargs.get("exist_ok", False) and self._fs.exists(pth): - raise FileExistsError + raise FileExistsError(pth) return self._fs.mkdir(pth, create_parents=create_parents, **kwargs) diff --git a/upath/registry.py b/upath/registry.py index 3e2cbb19..206db3e8 100644 --- a/upath/registry.py +++ b/upath/registry.py @@ -3,10 +3,12 @@ import importlib import warnings from functools import lru_cache +from pathlib import Path from typing import TYPE_CHECKING from fsspec.core import get_filesystem_class + if TYPE_CHECKING: from upath.core import PT @@ -46,8 +48,8 @@ def __getitem__(self, item: str) -> type[PT] | None: @lru_cache() -def get_upath_class(protocol: str) -> type[PT] | None: - """return the upath cls for the given protocol""" +def get_upath_class(protocol: str) -> type[PT] | type[Path] | None: + """Return the upath cls for the given protocol.""" cls: type[PT] | None = _registry[protocol] if cls is not None: return cls diff --git a/upath/tests/cases.py b/upath/tests/cases.py index 6bc461db..4b00ba70 100644 --- a/upath/tests/cases.py +++ b/upath/tests/cases.py @@ -209,10 +209,22 @@ def test_readlink(self): with pytest.raises(NotImplementedError): self.path.readlink() - @pytest.mark.xfail def test_rename(self): - # need to implement - raise False + upath = self.path.joinpath("file1.txt") + target = upath.parent.joinpath("file1_renamed.txt") + moved = upath.rename(target) + assert target == moved + assert not upath.exists() + assert moved.exists() + + def test_rename2(self): + upath = self.path.joinpath("folder1/file2.txt") + target = "file2_renamed.txt" + moved = upath.rename(target) + target_path = upath.parent.joinpath(target).resolve() + assert target_path == moved + assert not upath.exists() + assert moved.exists() def test_replace(self): pass @@ -220,8 +232,11 @@ def test_replace(self): def test_resolve(self): pass - def test_rglob(self): - pass + def test_rglob(self, pathlib_base): + pattern = "*.txt" + result = [*self.path.rglob(pattern)] + expected = [*pathlib_base.rglob(pattern)] + assert len(result) == len(expected) def test_samefile(self): pass @@ -386,3 +401,8 @@ def test_iterdir_no_dir(self): assert p.is_file() with pytest.raises(NotADirectoryError): _ = list(p.iterdir()) + + def test_rmdir_not_empty(self): + p = self.path.joinpath("folder1") + with pytest.raises(OSError, match="not empty"): + p.rmdir(recursive=False) diff --git a/upath/tests/implementations/test_azure.py b/upath/tests/implementations/test_azure.py index 4bcb99d2..1d3c8a47 100644 --- a/upath/tests/implementations/test_azure.py +++ b/upath/tests/implementations/test_azure.py @@ -41,3 +41,7 @@ def test_rmdir(self): @pytest.mark.skip def test_makedirs_exist_ok_false(self): pass + + @pytest.mark.xfail(reason="test interaction") + def test_rglob(self, pathlib_base): + return super().test_rglob(pathlib_base) diff --git a/upath/tests/implementations/test_http.py b/upath/tests/implementations/test_http.py index 7c55f802..6552ffcd 100644 --- a/upath/tests/implementations/test_http.py +++ b/upath/tests/implementations/test_http.py @@ -82,3 +82,11 @@ def test_resolve(self): # 301 redirect for `http://127.0.0.1:8080/folder` to # `http://127.0.0.1:8080/folder/` assert str(self.path.resolve()).endswith("/") + + def test_rename(self): + with pytest.raises(NotImplementedError): + return super().test_rename() + + def test_rename2(self): + with pytest.raises(NotImplementedError): + return super().test_rename()