Skip to content

Commit

Permalink
Small fixes for fsspec backend, Enable fsspec tests for s3fs and smb
Browse files Browse the repository at this point in the history
  • Loading branch information
painebot committed Feb 22, 2025
1 parent 04c28e2 commit 93308c2
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 71 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ jobs:

- name: Test
run: make coverage
if: matrix.os == 'ubuntu-latest'
if: matrix.os != 'windows-latest'

- name: Teardown Linux testing infra
run: make teardown-infra-ubuntu DOCKER_COMPOSE="docker compose"
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
DOCKER_COMPOSE := podman-compose
DOCKER_COMPOSE := docker compose

#########
# BUILD #
Expand Down Expand Up @@ -99,7 +99,7 @@ tests: test
.PHONY: setup-infra-ubuntu setup-infra-mac setup-infra-win setup-infra-common teardown-infra-ubuntu teardown-infra-mac teardown-infra-win teardown-infra-common dockerup dockerdown dockerlogs
setup-infra-ubuntu: dockerup

setup-infra-mac:
setup-infra-mac: dockerup
ci/mac/add_etc_hosts.sh
ci/mac/enable_sharing.sh

Expand Down
2 changes: 0 additions & 2 deletions js/src/treefinder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,6 @@ export namespace TreeFinderSidebar {
columns,
settings,
preferredDir,

rootPath = "",
caption = "TreeFinder",
id = "jupyterlab-tree-finder",
Expand Down Expand Up @@ -780,7 +779,6 @@ export namespace TreeFinderSidebar {
tooltip: "Refresh",
});


widget.toolbar.addItem("new file", new_file_button);
widget.toolbar.addItem("upload", uploader_button);
widget.toolbar.addItem("refresh", refresh_button);
Expand Down
101 changes: 73 additions & 28 deletions jupyterfs/manager/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import mimetypes
from base64 import decodebytes, encodebytes
from datetime import datetime
from pathlib import PurePosixPath

import nbformat
from jupyter_server.services.contents.filemanager import FileContentsManager
Expand Down Expand Up @@ -55,14 +56,55 @@ def create(*args, **kwargs):
def _checkpoints_class_default(self):
return NullCheckpoints

def _normalize_path(self, path):
if path and self._fs.root_marker and not path.startswith(self._fs.root_marker):
path = f"{self._fs.root_marker}{path}"
if path and not path.startswith(self.root):
path = f"{self.root}/{path}"
path = path or self.root

# Fix any differences due to root markers
path = path.replace("//", "/")

# TODO better carveouts
# S3 doesn't like spaces in paths
if self._fs.__class__.__name__.startswith("S3"):
path = path.replace(" ", "_")

return path

def _is_path_hidden(self, path):
"""Does the specific API style path correspond to a hidden node?
Args:
path (str): The path to check.
Returns:
hidden (bool): Whether the path is hidden.
"""
# We treat entries with leading . in the name as hidden (unix convention)
# We can (and should) check this even if the path does not exist
if PurePosixPath(path).name.startswith("."):
return True

# TODO PyFilesystem implementation does more, perhaps we should as well
return False

def is_hidden(self, path):
"""Does the API style path correspond to a hidden directory or file?
Args:
path (str): The path to check.
Returns:
hidden (bool): Whether the path exists and is hidden.
"""
return path.rsplit("/", 1)[-1].startswith(".")
path = self._normalize_path(path)
ppath = PurePosixPath(path)
# Path checks are quick, so we do it first to avoid unnecessary stat calls
if any(part.startswith(".") for part in ppath.parts):
return True
while ppath.parents:
if self._is_path_hidden(str(path)):
return True
ppath = ppath.parent
return False

def file_exists(self, path):
"""Returns True if the file exists, else returns False.
Expand All @@ -71,6 +113,7 @@ def file_exists(self, path):
Returns:
exists (bool): Whether the file exists.
"""
path = self._normalize_path(path)
return self._fs.isfile(path)

def dir_exists(self, path):
Expand All @@ -80,6 +123,7 @@ def dir_exists(self, path):
Returns:
exists (bool): Whether the path is indeed a directory.
"""
path = self._normalize_path(path)
return self._fs.isdir(path)

def exists(self, path):
Expand All @@ -89,12 +133,16 @@ def exists(self, path):
Returns:
exists (bool): Whether the target exists.
"""
path = self._normalize_path(path)
return self._fs.exists(path)

def _base_model(self, path):
"""Build the common base of a contents model"""

model = self._fs.info(path).copy()
try:
model = self._fs.info(path).copy()
except FileNotFoundError:
model = {"type": "file", "size": 0}
model["name"] = path.rstrip("/").rsplit("/", 1)[-1]
model["path"] = path.replace(self.root, "", 1)
model["last_modified"] = datetime.fromtimestamp(model["mtime"]).isoformat() if "mtime" in model else EPOCH_START
Expand All @@ -116,6 +164,13 @@ def _dir_model(self, path, content=True):
if content is requested, will include a listing of the directory
"""
model = self._base_model(path)

four_o_four = "directory does not exist: %r" % path

if not self.allow_hidden and self.is_hidden(path):
self.log.debug("Refusing to serve hidden directory %r, via 404 Error", path)
raise web.HTTPError(404, four_o_four)

if content:
files = self._fs.ls(path, detail=True, refresh=True)
model["content"] = [self._base_model(f["name"]) for f in files if self.allow_hidden or not self.is_hidden(f["name"])]
Expand All @@ -133,16 +188,8 @@ def _read_file(self, path, format):
"""
try:
bcontent = self._fs.cat(path)
except OSError:
# sometimes this causes errors:
# e.g. fir s3fs it causes
# OSError: [Errno 22] Unsupported header 'x-amz-checksum-mode' received for this API call.
# So try non-binary
try:
bcontent = self._fs.open(path, mode="r").read()
return bcontent, "text"
except OSError as e:
raise web.HTTPError(400, path, reason=str(e))
except OSError as e:
raise web.HTTPError(400, path, reason=str(e))

if format is None or format == "text":
# Try to interpret as unicode if format is unknown or if unicode
Expand Down Expand Up @@ -172,6 +219,7 @@ def _file_model(self, path, content=True, format=None):
If not specified, try to decode as UTF-8, and fall back to base64
"""
model = self._base_model(path)
model["type"] = "file"
model["mimetype"] = mimetypes.guess_type(path)[0]

if content:
Expand Down Expand Up @@ -216,14 +264,6 @@ def _notebook_model(self, path, content=True):
self.validate_notebook_model(model)
return model

def _normalize_path(self, path):
if path and self._fs.root_marker and not path.startswith(self._fs.root_marker):
path = f"{self._fs.root_marker}{path}"
if path and not path.startswith(self.root):
path = f"{self.root}/{path}"
path = path or self.root
return path

def get(self, path, content=True, type=None, format=None):
"""Takes a path for an entity and returns its model
Args:
Expand All @@ -250,8 +290,17 @@ def get(self, path, content=True, type=None, format=None):

def _save_directory(self, path, model):
"""create a directory"""
if not self._fs.exists(path):
self._fs.mkdir(path)
if not self.allow_hidden and self.is_hidden(path):
raise web.HTTPError(400, f"Cannot create directory {path!r}")

if not self.exists(path):
# TODO better carveouts
if self._fs.__class__.__name__.startswith("S3"):
# need to make a file temporarily
# use the convention of a hidden file
self._fs.touch(f"{path}/.s3fskeep")
else:
self._fs.mkdir(path)
elif not self._fs.isdir(path):
raise web.HTTPError(400, "Not a directory: %s" % (path))
else:
Expand All @@ -277,11 +326,7 @@ def _save_file(self, path, content, format):
bcontent = decodebytes(b64_bytes)
except Exception as e:
raise web.HTTPError(400, "Encoding error saving %s: %s" % (path, e))

if format == "text":
self._fs.pipe(path, bcontent)
else:
self._fs.pipe(path, bcontent)
self._fs.pipe(path, bcontent)

def save(self, model, path=""):
"""Save the file model and return the model with no content."""
Expand Down Expand Up @@ -333,7 +378,7 @@ def rename_file(self, old_path, new_path):
return

# Should we proceed with the move?
if self._fs.exists(new_path): # TODO and not samefile(old_os_path, new_os_path):
if self.exists(new_path): # TODO and not samefile(old_os_path, new_os_path):
raise web.HTTPError(409, "File already exists: %s" % new_path)

# Move the file
Expand Down
4 changes: 4 additions & 0 deletions jupyterfs/metamanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,4 +281,8 @@ async def post(self):
else:
resources = valid_resources

for resource in resources:
if not isinstance(resource, dict):
raise web.HTTPError(400, f"Resources must be a list of dicts, got: {resource}")

self.finish(json.dumps(self.contents_manager.initResource(*resources, options=options)))
91 changes: 53 additions & 38 deletions jupyterfs/tests/test_fsmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import shutil
import socket
from contextlib import nullcontext
from itertools import product
from pathlib import Path

import pytest
Expand All @@ -23,7 +24,8 @@

test_root_osfs = "osfs_local"

test_url_s3 = "http://127.0.0.1/"
# test_url_s3 = "http://127.0.0.1/"
test_url_s3 = f"http://{socket.gethostname().replace('.local', '')}"
test_port_s3 = "9000"

test_host_smb_docker_share = socket.gethostbyname(socket.gethostname())
Expand Down Expand Up @@ -66,17 +68,14 @@
class _TestBase:
"""Contains tests universal to all PyFilesystemContentsManager flavors"""

@pytest.fixture
def resource_uri(self):
raise NotImplementedError

@pytest.mark.parametrize("jp_server_config", configs)
async def test_write_read(self, jp_fetch, resource_uri, jp_server_config):
@pytest.mark.parametrize("jp_server_config,resource_type", list(product(configs, ["pyfs", "fsspec"])))
async def test_write_read(self, jp_fetch, jp_server_config, resource_type, tmp_path):
allow_hidden = jp_server_config["ContentsManager"]["allow_hidden"]

cc = ContentsClient(jp_fetch)

resources = await cc.set_resources([{"url": resource_uri}])
resource = self.get_resource(resource_type, tmp_path)
resources = await cc.set_resources([resource])
drive = resources[0]["drive"]

fpaths = [
Expand Down Expand Up @@ -131,9 +130,11 @@ def setup_method(self, method):
def teardown_method(self, method):
shutil.rmtree(self._test_dir, ignore_errors=True)

@pytest.fixture
def resource_uri(self, tmp_path):
yield f"osfs://{tmp_path}"
def get_resource(self, resource_type, tmp_path=None):
if resource_type == "pyfs":
return {"url": f"osfs://{tmp_path}", "type": resource_type}
elif resource_type == "fsspec":
return {"url": f"file://{tmp_path}", "type": resource_type}


class Test_FSManager_s3(_TestBase):
Expand All @@ -159,16 +160,28 @@ def setup_method(self, method):
def teardown_method(self, method):
self._rootDirUtil.delete()

@pytest.fixture
def resource_uri(self):
uri = "s3://{id}:{key}@{bucket}?endpoint_url={url}:{port}".format(
id=s3.aws_access_key_id,
key=s3.aws_secret_access_key,
bucket=test_dir,
url=test_url_s3.strip("/"),
port=test_port_s3,
)
yield uri
def get_resource(self, resource_type, tmp_path=None):
if resource_type == "pyfs":
uri = "s3://{id}:{key}@{bucket}?endpoint_url={url}:{port}".format(
id=s3.aws_access_key_id,
key=s3.aws_secret_access_key,
bucket=test_dir,
url=test_url_s3.strip("/"),
port=test_port_s3,
)
return {"url": uri, "type": resource_type}
elif resource_type == "fsspec":
uri = f"s3://{test_dir}"
return {
"url": uri,
"type": resource_type,
"kwargs": {
"key": s3.aws_access_key_id,
"secret": s3.aws_secret_access_key,
"endpoint_url": f"{test_url_s3}:{test_port_s3}",
"default_cache_type": "none",
},
}


@pytest.mark.darwin
Expand All @@ -195,12 +208,6 @@ class Test_FSManager_smb_docker_share(_TestBase):
smb_port=test_name_port_smb_docker_share,
)

# direct_tcp=False,
# host=None,
# hostname=None,
# my_name="local",
# name_port=137,
# smb_port=None,
@classmethod
def setup_class(cls):
# delete any existing root
Expand All @@ -217,17 +224,25 @@ def teardown_method(self, method):
# delete any existing root
self._rootDirUtil.delete()

@pytest.fixture
def resource_uri(self):
uri = "smb://{username}:{passwd}@{host}:{smb_port}/{share}?name-port={name_port}".format(
username=samba.smb_user,
passwd=samba.smb_passwd,
host=test_host_smb_docker_share,
share=test_hostname_smb_docker_share,
smb_port=test_name_port_smb_docker_share,
name_port=test_name_port_smb_docker_nameport,
)
yield uri
def get_resource(self, resource_type, tmp_path=None):
if resource_type == "pyfs":
uri = "smb://{username}:{passwd}@{host}:{smb_port}/{share}?name-port={name_port}".format(
username=samba.smb_user,
passwd=samba.smb_passwd,
host=test_host_smb_docker_share,
share=test_hostname_smb_docker_share,
smb_port=test_name_port_smb_docker_share,
name_port=test_name_port_smb_docker_nameport,
)
elif resource_type == "fsspec":
uri = "smb://{username}:{passwd}@{host}:{smb_port}/{share}".format(
username=samba.smb_user,
passwd=samba.smb_passwd,
host=test_host_smb_docker_share,
share=test_hostname_smb_docker_share,
smb_port=test_name_port_smb_docker_share,
)
return {"url": uri, "type": resource_type}


# @pytest.mark.darwin
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ develop = [
"fs.smbfs>=0.6.3",
# fsspec
"fsspec>=2023.6.0",
"s3fs>=2024",
"smbprotocol",
]
fs = [
"fs>=2.4.11",
Expand Down

0 comments on commit 93308c2

Please sign in to comment.