Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cluster Info #455

Open
wants to merge 10 commits into
base: dev
Choose a base branch
from
1 change: 1 addition & 0 deletions .github/workflows/gempyor-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ jobs:
shell: bash
- name: Run gempyor tests
run: |
export FLEPI_PATH=$(pwd)
cd flepimop/gempyor_pkg
pytest --exitfirst
shell: bash
Expand Down
37 changes: 37 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
push:
paths:
- 'flepimop/gempyor_pkg/**/*.py'
- 'info/**/*'
pull_request:
types:
- edited
Expand All @@ -14,8 +15,10 @@ on:
- synchronize
paths:
- '**/*.py'
- 'info/**/*'
branches:
- main
- dev

jobs:
black-for-python:
Expand Down Expand Up @@ -45,3 +48,37 @@ jobs:
with:
src: ${{ env.BLACK_SRC }}
options: "--line-length ${{ env.BLACK_LINE_LENGTH }} --extend-exclude '${{ env.BLACK_EXTEND_EXCLUDE }}' --check --verbose"
check-info-json-schema:
runs-on: ubuntu-latest
if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
steps:
- name: Checkout
uses: actions/checkout@v4
with:
lfs: true
sparse-checkout: |
info
sparse-checkout-cone-mode: false
- name: Install yq and check-jsonschema
run: |
sudo apt update
sudo apt install snapd
sudo snap install yq
sudo apt install pipx
pipx install check-jsonschema
- name: Convert YAML to JSON
run: |
cd info
for d in $( ls ); do
cd $d
for y in *.yml; do
yq --output-format json $y > ${y%.yml}.json
done
for j in *.json; do
if [[ "$j" != "schema.json" ]]; then
check-jsonschema --verbose --schemafile schema.json $j
fi
done
cd ..
done
cd ..
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,6 @@ flepimop/gempyor_pkg/.coverage

# Environment variables
.env

# info/ directory
info/**/*.json
1 change: 1 addition & 0 deletions flepimop/gempyor_pkg/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies = [
"numpy",
"pandas",
"pyarrow",
"pydantic",
"scipy",
"seaborn",
"sympy",
Expand Down
223 changes: 223 additions & 0 deletions flepimop/gempyor_pkg/src/gempyor/info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
"""
Retrieving static information from developer managed yaml files.

Currently, it includes utilities for handling cluster-specific information, but it can
be extended to other categories as needed.

Classes:
Module: Represents a software module with a name and optional version.
PathExport: Represents a path export with a path, prepend flag, and error handling.
Cluster: Represents a cluster with a name, list of modules, and list of path
exports.

Functions:
get_cluster_info: Retrieves cluster-specific information.

Notes:
By default the order for search paths is:

1) The current working directory, then
2) The directory specified by the `$FLEPI_INFO_PATH` environment variable if set,
and finally
3) The directory specified by the `$FLEPI_PATH` environment variable if set.

The functions in this module will search for an `info/` directory under the search
paths with a structure of `info/<category>/<name>.yml` where `<category>` is the
category of the information and `<name>` is the name of the information. The first
yaml file found will be used to populate the model.

The default search paths can be overridden by passing a list of paths to the
function being used via the `search_paths` argument.

Examples:
>>> from pprint import pprint
>>> from gempyor.info import get_cluster_info
>>> cluster_info = get_cluster_info("longleaf")
>>> cluster_info.name
'longleaf'
>>> pprint(cluster_info.modules)
[Module(name='gcc', version='9.1.0'),
Module(name='anaconda', version='2023.03'),
Module(name='git', version=None),
Module(name='aws', version=None)]
"""

__all__ = ["Cluster", "Module", "PathExport", "get_cluster_info"]


from collections.abc import Iterable
import os
from pathlib import Path
import re
from socket import getfqdn
from typing import Pattern, TypeVar

from pydantic import BaseModel
import yaml


class Module(BaseModel):
"""
A model representing a module to load.

Attributes:
name: The name of the module to load.
version: The specific version of the module to load if there is one.

See Also:
[Lmod](https://lmod.readthedocs.io/en/latest/)
"""

name: str
version: str | None = None


class PathExport(BaseModel):
"""
A model representing the export path configuration.

Attributes:
path: The file system path of the path to add to the `$PATH` environment
variable.
prepend: A flag indicating whether to prepend additional information to the
`$PATH` environment variable.
error_if_missing: A flag indicating whether to raise an error if the path is
missing.
"""

path: Path
prepend: bool = True
error_if_missing: bool = False


class Cluster(BaseModel):
"""
A model representing a cluster configuration.

Attributes:
name: The name of the cluster.
modules: A list of modules associated with the cluster.
path_exports: A list of path exports for the cluster.
"""

name: str
modules: list[Module] = []
path_exports: list[PathExport] = []


_BASE_MODEL_TYPE = TypeVar("T", bound=BaseModel)


_CLUSTER_FQDN_REGEXES: tuple[tuple[str, Pattern], ...] = (
("longleaf", re.compile(r"^longleaf\-login[0-9]+\.its\.unc\.edu$")),
("rockfish", re.compile(r"^login[0-9]+\.cm\.cluster$")),
)


def _get_info(
category: str,
name: str,
model: type[_BASE_MODEL_TYPE],
search_paths: Iterable[os.PathLike | str] | os.PathLike | str | None,
) -> _BASE_MODEL_TYPE:
"""
Get and parse an information yaml file.

This function is a light wrapper around reading and parsing yaml files located in
`$FLEPI_PATH/info`.

Args:
category: The category of info to get, corresponds to a subdirectory in
`$FLEPI_PATH/info`.
name: The name of the info to get, corresponds to the name of a yaml file and
is usually a human readable short name.
model: The pydantic class to parse the info file with, determines the return
type.
search_paths: Either a path(s) like determine the directory to look for the info
directory in or `None` to use the the default search paths.

Notes:
The default search paths are:
1) The current working directory, then
2) The directory specified by the `$FLEPI_INFO_PATH` environment variable if
set, and finally
3) The directory specified by the `$FLEPI_PATH` environment variable if set.

Returns:
An instance of `model` with the contained info found and parsed.
"""
if search_paths is None:
search_paths = [
p
for p in (Path.cwd(), os.getenv("FLEPI_INFO_PATH"), os.getenv("FLEPI_PATH"))
if p is not None
]
elif isinstance(search_paths, (os.PathLike, str)):
search_paths = [search_paths]
search_paths = [Path(p).absolute() for p in search_paths]
info = next(
(
info
for p in search_paths
if (info := p / "info" / category / f"{name}.yml").exists() and info.is_file()
),
None,
)
if info is None:
raise ValueError(
f"An {category}/{name}.yml file was not found in any of the following "
f"directories: {', '.join(map(lambda p: str(p / 'info'), search_paths))}."
)
return model.model_validate(yaml.safe_load(info.read_text()))


def get_cluster_info(
name: str | None,
search_paths: Iterable[os.PathLike | str] | os.PathLike | str | None = None,
) -> Cluster:
"""
Get cluster specific info.

Args:
name: The name of the cluster to pull information for. Currently only 'longleaf'
and 'rockfish' are supported or `None` to infer from the FQDN.
search_paths: Either a path(s) like determine the directory to look for the info
directory in or `None` to use the the default search paths.

Returns:
An object containing the information about the `name` cluster.

Examples:
>>> from gempyor.info import get_cluster_info
>>> cluster_info = get_cluster_info("longleaf")
>>> cluster_info.name
'longleaf'
"""
name = _infer_cluster_from_fqdn() if name is None else name
return _get_info("cluster", name, Cluster, search_paths)


def _infer_cluster_from_fqdn(raise_error: bool = True) -> str | None:
"""
Infer the cluster name from the FQDN.

Args:
raise_error: A flag indicating whether to raise an error if the FQDN does not
match any of the expected regexes.

Returns:
The name of the cluster inferred from the FQDN.

Raises:
ValueError: If the value of `socket.getfqdn()` does not match an expected regex
and `raise_error` is `True`.
"""
fqdn = getfqdn()
for cluster, regex in _CLUSTER_FQDN_REGEXES:
if regex.match(fqdn):
return cluster
if raise_error:
raise ValueError(
f"The fqdn, '{fqdn}', does not match any of the expected clusters."
)
return None
78 changes: 78 additions & 0 deletions flepimop/gempyor_pkg/tests/info/test__get_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from pathlib import Path
from typing import Literal

from pydantic import BaseModel
import pytest
import yaml

from gempyor.info import _get_info


class NameOnly(BaseModel):
name: str


@pytest.fixture
def create_mock_info_directory(tmp_path: Path) -> Path:
for file, contents in (
("abc/def.yml", {"name": "Foobar"}),
("abc/ghi.yml", {"name": "Fizzbuzz"}),
):
file = tmp_path / "info" / file
file.parent.mkdir(parents=True, exist_ok=True)
with file.open(mode="w") as f:
yaml.dump(contents, f)
return tmp_path.absolute()


def test_file_does_not_exist_value_error(
monkeypatch: pytest.MonkeyPatch, create_mock_info_directory: Path
) -> None:
monkeypatch.setenv("FLEPI_PATH", str(create_mock_info_directory.parent))
with pytest.raises(ValueError):
_get_info("does_not", "exist", object, None)


@pytest.mark.parametrize(
("category", "name", "model"), (("abc", "def", NameOnly), ("abc", "ghi", NameOnly))
)
def test_output_validation_with_working_directory(
monkeypatch: pytest.MonkeyPatch,
create_mock_info_directory: Path,
category: str,
name: str,
model: type[BaseModel],
) -> None:
monkeypatch.chdir(create_mock_info_directory)
_output_validation_test(create_mock_info_directory, category, name, model)


@pytest.mark.parametrize(
("category", "name", "model"), (("abc", "def", NameOnly), ("abc", "ghi", NameOnly))
)
@pytest.mark.parametrize("envvar", ("FLEPI_INFO_PATH", "FLEPI_PATH"))
def test_output_validation_with_env_vars(
monkeypatch: pytest.MonkeyPatch,
create_mock_info_directory: Path,
category: str,
name: str,
model: type[BaseModel],
envvar: Literal["FLEPI_INFO_PATH", "FLEPI_PATH"],
) -> None:
monkeypatch.setenv(envvar, str(create_mock_info_directory))
_output_validation_test(create_mock_info_directory, category, name, model)


def _output_validation_test(
path: Path,
category: str,
name: str,
model: type[BaseModel],
) -> None:
results = []
for path in (None, path, str(path)):
info = _get_info(category, name, model, path)
assert isinstance(info, model)
results.append(info)
for i in range(len(results) - 1):
assert results[i] == results[i + 1]
Loading
Loading