Skip to content

Commit

Permalink
updating pcluster to 3.5.1 and to pydantic > 2
Browse files Browse the repository at this point in the history
  • Loading branch information
jerowe committed Sep 25, 2023
1 parent d03e0d3 commit a897357
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 66 deletions.
26 changes: 15 additions & 11 deletions aws_pcluster_helpers/models/instance_types_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,28 @@

from mypy_boto3_ec2.type_defs import InstanceTypeDef
from pydantic.dataclasses import dataclass
from pydantic import BaseModel, computed_field

from devtools import PrettyFormat, pprint, pformat, debug
import json
import os
import yaml

if not os.environ.get('AWS_DEFAULT_REGION'):
os.environ['AWS_DEFAULT_REGION'] = 'us-east-1'


class Config:
arbitrary_types_allowed = True


@dataclass(config=Config)
class InstanceTypesMappings:
# @dataclass(config=Config)
class InstanceTypesMappings(BaseModel):
ec2_instance_types: Dict
sinfo_instance_types: Dict

@classmethod
def from_json(cls, json_file) -> InstanceTypesMappings:
def from_json(cls, json_file):
"""
Data looks like this:
"basic": {
Expand Down Expand Up @@ -58,8 +62,9 @@ def from_json(cls, json_file) -> InstanceTypesMappings:
else:
inverse_data[instance_type]["queues"].append(queue)
data[sinfo_instance_type]["queues"].append(queue)
return InstanceTypesMappings(
ec2_instance_types=inverse_data, sinfo_instance_types=data
return cls(
ec2_instance_types=inverse_data,
sinfo_instance_types=data
)


Expand All @@ -68,10 +73,10 @@ def size_in_gib(mib: int) -> int:
return mib_bytes / mib_bytes.IEC_prefixes["Gi"]


@dataclass(config=Config)
class InstanceTypesData:
class InstanceTypesData(BaseModel):
data: Dict

@computed_field
@property
def instance_type_data(self) -> InstanceTypeDef:
if "Hypervisor" in self.data.keys():
Expand All @@ -93,15 +98,14 @@ def from_yaml(cls, yaml_file: str) -> InstanceTypesData:
return InstanceTypesData(data=d)


@dataclass
class PClusterInstanceTypes:
class PClusterInstanceTypes(BaseModel):
instance_type_data: Dict[str, InstanceTypesData]

@classmethod
def from_json(cls, json_file: str) -> PClusterInstanceTypes:
def from_json(cls, json_file: str):
data = json.load(open(json_file))
instance_type_defs = {}
for k in data.keys():
instance_type_defs[k] = InstanceTypesData(data={"data": data[k]})

return PClusterInstanceTypes(instance_type_data=instance_type_defs)
return cls(instance_type_data=instance_type_defs)
33 changes: 23 additions & 10 deletions aws_pcluster_helpers/models/nextflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,18 @@
from aws_pcluster_helpers.models.sinfo import SInfoTable, SinfoRow
from jinja2 import Environment, BaseLoader
from pcluster import utils
from pydantic import validator
from typing import Any
from typing import List, Optional

import os
from pydantic import ValidationError, validate_call
from pydantic import BaseModel, computed_field
from pydantic import (
BaseModel,
FieldValidationInfo,
ValidationError,
field_validator,
)

PCLUSTER_VERSION = utils.get_installed_version()

Expand All @@ -26,25 +37,27 @@ class NXFProcess(SinfoRow):


class NXFSlurmConfig(SInfoTable):
processes: Optional[Dict[str, NXFProcess]]
default_processes: Optional[Dict[str, NXFProcess]]
# processes: Optional[Dict[str, NXFProcess]] = None
# default_processes: Optional[Dict[str, NXFProcess]] = None
include_memory: bool = False
scheduleable_memory = 0.95
scheduleable_memory: float = 0.95

@validator("processes", pre=True, always=True)
def set_processes(cls, v, values, **kwargs) -> Dict[str, NXFProcess]:
@computed_field
@property
def processes(self) -> Dict[str, NXFProcess]:
nxf_processes = {}
rows = values.get("rows", [])
rows = self.rows
for row in rows:
row_data = row.__dict__
label = row_data["label"]
nxf_processes[label] = NXFProcess(**row_data)
return nxf_processes

@validator("default_processes", pre=True, always=True)
def set_default_processes(cls, v, values, **kwargs) -> Dict[str, NXFProcess]:
@computed_field
@property
def default_processes(self) -> Dict[str, NXFProcess]:
# def set_default_processes(self) -> Dict[str, NXFProcess]:
processes = values.get("processes", [])
processes = self.processes
# processes = self.processes
default_processes = {
"tiny": dict(label="tiny", mem_min=1, mem=6, cpu=1, **defaults),
Expand Down
89 changes: 46 additions & 43 deletions aws_pcluster_helpers/models/sinfo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
from typing import Any
from typing import List, Optional

import os
from pydantic import ValidationError, validate_call
from pydantic import BaseModel
from pydantic import (
BaseModel,
FieldValidationInfo,
ValidationError,
field_validator,
)

import pandas as pd
from aws_pcluster_helpers import (
PClusterConfig,
Expand All @@ -11,10 +21,11 @@
from aws_pcluster_helpers.models.config import PClusterConfigFiles
from aws_pcluster_helpers.utils.logging import setup_logger
from pcluster.config.cluster_config import SlurmClusterConfig
from pydantic import BaseModel
from pydantic import validator
from rich.table import Table
import os

from functools import cached_property

from pydantic import BaseModel, computed_field

if not os.environ.get('AWS_DEFAULT_REGION'):
os.environ['AWS_DEFAULT_REGION'] = 'us-east-1'
Expand All @@ -23,20 +34,20 @@


class SinfoRow(BaseModel):
sinfo_name: Optional[str]
label: Optional[str]
queue: Optional[str]
constraint: Optional[str]
ec2_instance_type: Optional[str]
mem: Optional[int]
mem_mib: Optional[int]
cpu: Optional[int]
sinfo_name: Optional[str] = None
label: Optional[str] = None
queue: Optional[str] = None
constraint: Optional[str] = None
ec2_instance_type: Optional[str] = None
mem: Optional[int] = None
mem_mib: Optional[int] = None
cpu: Optional[int] = None
scheduleable_memory: Optional[float] = 0.95
scheduleable_memory_mib: Optional[int]
scheduleable_memory_gib: Optional[int]
vcpu: Optional[int]
gpus: Optional[List]
extra: Optional[dict]
scheduleable_memory_mib: Optional[float] = None
scheduleable_memory_gib: Optional[float] = None
vcpu: Optional[int] = None
gpus: Optional[List] = []
extra: Optional[dict] = None


# TODO add custom ami lookup
Expand All @@ -56,37 +67,28 @@ class Config:
{"label": "EC2", "key": "ec2_instance_type"},
]
pcluster_config_files: PClusterConfigFiles = PClusterConfigFiles()
# instance_type_mappings: Optional[InstanceTypesMappings]
pcluster_instance_types: Optional[PClusterInstanceTypes]
pcluster_config: Optional[SlurmClusterConfig]
rows: Optional[List[SinfoRow]]
dataframe: Optional[pd.DataFrame]

# @validator("instance_type_mappings", pre=True, always=True)
# def set_instance_type_mappings(cls, v, values, **kwargs):
# pcluster_config_files = values.get("pcluster_config_files")
# return InstanceTypesMappings.from_json(
# pcluster_config_files.instance_name_type_mappings_file
# )

@validator("pcluster_instance_types", pre=True, always=True)
def set_pcluster_instance_types(cls, v, values, **kwargs):
pcluster_config_files = values.get("pcluster_config_files")

@computed_field
@property
def pcluster_instance_types(self) -> PClusterInstanceTypes:
pcluster_config_files = self.pcluster_config_files
return PClusterInstanceTypes.from_json(
pcluster_config_files.instance_types_data_file
)

@validator("pcluster_config", pre=True, always=True)
def set_pcluster_config(cls, v, values, **kwargs):
pcluster_config_files = values.get("pcluster_config_files")
@computed_field
@property
def pcluster_config(self) -> PClusterConfig:
pcluster_config_files = self.pcluster_config_files
return PClusterConfig.from_yaml(pcluster_config_files.pcluster_config_file)

@validator("rows", pre=True, always=True)
def set_rows(cls, v, values, **kwargs) -> List[SinfoRow]:
pcluster_config_files = values.get("pcluster_config_files")
@computed_field
@property
def rows(self) -> List[SinfoRow]:
pcluster_config_files = self.pcluster_config_files
# instance_types_mappings = values.get("instance_type_mappings")
pcluster_instance_types = values.get("pcluster_instance_types")
pcluster_config = values.get("pcluster_config")
pcluster_instance_types = self.pcluster_instance_types
pcluster_config = self.pcluster_config

sinfo_records = []
for slurm_queue in pcluster_config.scheduling.queues:
Expand Down Expand Up @@ -154,10 +156,11 @@ def set_rows(cls, v, values, **kwargs) -> List[SinfoRow]:
)
return sinfo_records

@validator("dataframe", pre=True, always=True)
def set_dataframe(cls, v, values, **kwargs) -> pd.DataFrame:
@computed_field
@property
def dataframe(self) -> pd.DataFrame:
records = []
rows = values.get("rows")
rows = self.rows
for record in rows:
records.append(record.__dict__)
df = pd.DataFrame.from_records(records)
Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
devtools
colorlog
pydantic
pydantic==2.3.0
pydantic-core==2.6.3
boto3
boto3-stubs[essential]
datasize
humanize
rich-click
pandas
cookiecutter
aws-parallelcluster==3.3.1
aws-parallelcluster==3.5.1
typer
1 change: 1 addition & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
black
watchdog
pytest
versioneer
75 changes: 75 additions & 0 deletions tests/models/test_sinfo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python

"""Tests for `aws_pcluster_helpers` package."""

import unittest

from aws_pcluster_helpers import (
PClusterConfig,
PClusterConfigFiles,
InstanceTypesData,
PClusterInstanceTypes,
InstanceTypesMappings,
size_in_gib,
)
from aws_pcluster_helpers.utils.logging import setup_logger
from aws_pcluster_helpers.models.config import (
ENV_PCLUSTER_CONFIG_FILE,
ENV_INSTANCE_TYPES_DATA_FILE,
ENV_INSTANCE_TYPE_MAPPINGS_FILE,
)
from aws_pcluster_helpers.models.sinfo import SInfoTable, SinfoRow
import yaml
import json
import os
from devtools import PrettyFormat, pprint, pformat, debug
from rich.console import Console

from aws_pcluster_helpers.commands import cli_sinfo
from aws_pcluster_helpers.commands import cli_gen_nxf_slurm_config

instance_types_data_file = os.path.join(
os.path.dirname(__file__), "..", "instance-types-data.json"
)
instance_type_mapping_file = os.path.join(
os.path.dirname(__file__), "..", "instance_name_type_mappings.json"
)
pcluster_config_file = os.path.join(
os.path.dirname(__file__), "../", "pcluster_config.yml"
)
os.environ[ENV_INSTANCE_TYPE_MAPPINGS_FILE] = instance_type_mapping_file
os.environ[ENV_INSTANCE_TYPES_DATA_FILE] = instance_types_data_file
os.environ[ENV_PCLUSTER_CONFIG_FILE] = pcluster_config_file

logger = setup_logger(logger_name="tests", log_level="DEBUG")


def test_files():
assert os.path.exists(instance_type_mapping_file)
assert os.path.exists(pcluster_config_file)
assert os.path.exists(instance_types_data_file)


def test_sinfo():
sinfo = SInfoTable()
table = sinfo.get_table()
console = Console()
console.print(table)


def test_load_pcluster_config():
pcluster_config = PClusterConfig.from_yaml(pcluster_config_file)
assert pcluster_config


def test_load_instance_types_data():
pcluster_instance_types = PClusterInstanceTypes.from_json(instance_types_data_file)
# debug(pcluster_instance_types)
assert pcluster_instance_types


def test_load_instance_types_mapping():
instance_types_mappings = InstanceTypesMappings.from_json(
instance_type_mapping_file
)
assert instance_types_mappings

0 comments on commit a897357

Please sign in to comment.