diff --git a/.env.sample b/.env.sample index cca6cf9..909e568 100644 --- a/.env.sample +++ b/.env.sample @@ -6,6 +6,5 @@ AZURE_WORKSPACE= # Docker parameters DOCKER_REGISTRY= DOCKER_TAG= -## Gitlab tokens DOCKER_USERNAME= -DOCKER_PASSWORD= \ No newline at end of file +DOCKER_PASSWORD= diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index fedcded..4c85341 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -23,5 +23,5 @@ jobs: - uses: pdm-project/setup-pdm@v4.1 - name: Publish package distributions to PyPI - run: + run: pdm publish diff --git a/README.md b/README.md index ea86c80..4611b53 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ It aims to: Simply run ```bash -pip install ezazml +pip install ez-azml ``` # Quickstart The main entrypoint is the CLI command. The following command will show you the help @@ -25,12 +25,13 @@ The main entrypoint is the CLI command. The following command will show you the ez-azml -h ``` -You can run an example with +You can run an example with (if you have cloned the repo) ## Command ``` -ez-azml --config configs/pytorch/main.yaml run +ez-azml --config configs/command/main.yaml run ``` -## Pipeline +## PipelineRun ``` -ez-azml --config configs/pipeline/main.yaml run -``` \ No newline at end of file +PYTHONPATH=$PYTHONPATH:. ez-azml --config configs/pipeline/main.yaml run +``` +(`PYTHONPATH` to point to the examples dir. This is not needed if your pipeline is available in any package as an importable function) diff --git a/configs/pytorch/main.yaml b/configs/command/main.yaml similarity index 84% rename from configs/pytorch/main.yaml rename to configs/command/main.yaml index 0b0a51d..b9aaea9 100644 --- a/configs/pytorch/main.yaml +++ b/configs/command/main.yaml @@ -1,7 +1,7 @@ cloud_run: - class_path: ez_azml.cloud_runs.DockerCommand + class_path: ez_azml.cloud_runs.DockerCommandRun init_args: - docker: docker.yaml + docker: ../dockers/pytorch.yaml code: examples compute: ../clusters/cpu.yaml inputs: @@ -20,7 +20,7 @@ cloud_run: mode: rw_mount commands: [ echo 'This is an example', - python pytorch.py + python pytorch_script.py ] flags: [ "--learning_rate 1e-6", diff --git a/configs/pytorch/docker.yaml b/configs/dockers/pytorch.yaml similarity index 100% rename from configs/pytorch/docker.yaml rename to configs/dockers/pytorch.yaml diff --git a/configs/environments/conda.yaml b/configs/environments/conda.yaml index a9de33d..55c1e62 100644 --- a/configs/environments/conda.yaml +++ b/configs/environments/conda.yaml @@ -2,10 +2,5 @@ name: default_environment channels: - defaults dependencies: - - python=3.8.12 + - python=3.9.12 - pip=21.2.2 - - pip: - - mldesigner==0.1.0b17 - - azure-ai-ml==1.18.0 - - azureml-mlflow==1.56.0 - - ez-azml==0.1.0 diff --git a/configs/pipeline/main.yaml b/configs/pipeline/main.yaml index 1476989..1adb27f 100644 --- a/configs/pipeline/main.yaml +++ b/configs/pipeline/main.yaml @@ -1,12 +1,59 @@ cloud_run: - class_path: ez_azml.cloud_runs.Pipeline + class_path: ez_azml.cloud_runs.PipelineRun init_args: experiment_name: "pipelines example" commands: - - function: examples.pipelines.test_fn - environment: ../environments/pipeline.yaml + torch: + class_path: ez_azml.cloud_runs.DockerCommandRun + init_args: + docker: ../dockers/pytorch.yaml + code: examples/pytorch_script.py + compute: ../clusters/cpu.yaml + inputs: + data_path: + class_path: azure.ai.ml.Input + init_args: + type: uri_folder + path: azureml://datastores/workspaceblobstore/paths/data + mode: ro_mount + outputs: + output_path: + class_path: azure.ai.ml.Output + init_args: + type: uri_folder + path: azureml://datastores/workspaceblobstore/paths/outputs + mode: rw_mount + commands: [ + echo 'This is an example', + python pytorch_script.py + ] + flags: [ + "--learning_rate 1e-6", + "--output_path ${{outputs.output_path}}" + #"The input is ignored, could be used as ${{inputs.data_path}} through flags" + ] + register_kwargs: + version: 2 + command: + class_path: ez_azml.cloud_runs.CommandRun + init_args: + name: print_output + compute: ../clusters/cpu.yaml + code: examples/print_output.py + environment: ../environments/pipeline.yaml + inputs: + prev_output: + class_path: azure.ai.ml.Input + init_args: + type: uri_folder + commands: [ + echo 'This is an print example', + "python print_output.py --path ${{inputs.prev_output}}" + ] + register_kwargs: + version: 2 - pipeline: examples.pipelines.test_pipeline + pipeline: examples.pipeline.ez_azml_pipeline compute: ../clusters/cpu_raw.yaml inputs: test_input: diff --git a/examples/pipeline.py b/examples/pipeline.py new file mode 100644 index 0000000..fd2baeb --- /dev/null +++ b/examples/pipeline.py @@ -0,0 +1,6 @@ +def ez_azml_pipeline(test_input): + """Example pipeline function.""" + # pytorch_script is the name of the registered component + test_result = pytorch_script(data_path=test_input) # type: ignore # noqa F821 + print_output(prev_output=test_result.outputs.output_path) # type: ignore # noqa F821 + return {"test_output": test_result.outputs.output_path} diff --git a/examples/pipelines.py b/examples/pipelines.py deleted file mode 100644 index 18dc72e..0000000 --- a/examples/pipelines.py +++ /dev/null @@ -1,16 +0,0 @@ -def test_fn( - test_input: "Input(type='uri_file')", # type: ignore # noqa: F821 - test_output: "Output(type='uri_folder')", # type: ignore # noqa: F821 -): - """Test input function.""" - out_path = f"{test_output}/out_file.txt" - with open(str(input)) as f_in, open(out_path, "w") as f_out: - content = f_in.read() - print(content) - f_out.write(content) - - -def test_pipeline(test_input): - """Test pipeline function.""" - test_result = test_fn(test_input=test_input) - return {"test_output": test_result.outputs.test_output} diff --git a/examples/print_output.py b/examples/print_output.py new file mode 100644 index 0000000..f911e0b --- /dev/null +++ b/examples/print_output.py @@ -0,0 +1,11 @@ +import argparse +from pathlib import Path + +# Argument parsing +parser = argparse.ArgumentParser(description="Print input") +parser.add_argument("--path", type=Path, default="outputs", help="output to read") +args = parser.parse_args() +path: Path = args.path +for file in path.glob("*.txt"): + content = (path / file).read_text() + print(content) diff --git a/examples/pytorch.py b/examples/pytorch_script.py similarity index 100% rename from examples/pytorch.py rename to examples/pytorch_script.py diff --git a/src/ez_azml/cli/ez_azml.py b/src/ez_azml/cli/ez_azml.py index 30bf491..dc2d97b 100644 --- a/src/ez_azml/cli/ez_azml.py +++ b/src/ez_azml/cli/ez_azml.py @@ -43,3 +43,10 @@ def run(self): output = self.cloud_run.run() logger.info(f"Run available at {output.url}") self.cloud_run.on_run_end(output) + + def register(self): + """Registers the cloud run as a reusable component.""" + self.cloud_run.on_register_start() + self.cloud_run.register() + self.cloud_run.on_register_end() + logger.info("Cloud run registered!") diff --git a/src/ez_azml/cloud_runs/__init__.py b/src/ez_azml/cloud_runs/__init__.py index faad41b..1fe11e4 100644 --- a/src/ez_azml/cloud_runs/__init__.py +++ b/src/ez_azml/cloud_runs/__init__.py @@ -1,4 +1,4 @@ -from .commands import Command, DockerCommand -from .pipelines import Pipeline +from .commands import CommandRun, DockerCommandRun +from .pipelines import PipelineRun -__all__ = ["Command", "DockerCommand", "Pipeline"] +__all__ = ["CommandRun", "DockerCommandRun", "PipelineRun"] diff --git a/src/ez_azml/cloud_runs/cloud_run.py b/src/ez_azml/cloud_runs/cloud_run.py index caa759a..c2d08a3 100644 --- a/src/ez_azml/cloud_runs/cloud_run.py +++ b/src/ez_azml/cloud_runs/cloud_run.py @@ -47,3 +47,15 @@ def run(self) -> RunOutput: def on_run_end(self, output: RunOutput): """Hook called once run has been submitted.""" return + + def on_register_start(self): + """Hook called before a component has been registered.""" + return + + @abstractmethod + def register(self): + """Registers the run as a reusable component.""" + + def on_register_end(self): + """Hook called after a component has been registered.""" + return diff --git a/src/ez_azml/cloud_runs/commands/__init__.py b/src/ez_azml/cloud_runs/commands/__init__.py index 2fd3604..a8a7bed 100644 --- a/src/ez_azml/cloud_runs/commands/__init__.py +++ b/src/ez_azml/cloud_runs/commands/__init__.py @@ -1,4 +1,4 @@ -from .command import Command -from .docker_command import DockerCommand +from .command import CommandRun +from .docker_command import DockerCommandRun -__all__ = ["DockerCommand", "Command"] +__all__ = ["DockerCommandRun", "CommandRun"] diff --git a/src/ez_azml/cloud_runs/commands/command.py b/src/ez_azml/cloud_runs/commands/command.py index 1f6dd36..da49322 100644 --- a/src/ez_azml/cloud_runs/commands/command.py +++ b/src/ez_azml/cloud_runs/commands/command.py @@ -1,54 +1,117 @@ -from typing import Optional +import io +from pathlib import Path +from typing import Any, Optional, Union -from azure.ai.ml import command -from azure.ai.ml.entities import ( - UserIdentityConfiguration, - WorkspaceConnection, -) +import yaml +from azure.ai.ml import Input, MLClient, Output, command, load_component +from azure.ai.ml.entities import Command, UserIdentityConfiguration, WorkspaceConnection from typing_extensions import override from ez_azml.cloud_runs.cloud_run import CloudRun, RunOutput -class Command(CloudRun): +class CommandRun(CloudRun): """Cloud run that is based on AzureML Commands. Args: - code: location of the scripts to use + code: location of the python scripts to use commands: commands to run on the cloud (e.g. `python my_script.py`) flags: flags to use with the last command. identity: credentials to use. + name: command's name. + register_kwargs: kwargs to use when registering component. """ def __init__( self, - code: str, - commands: list[str], + code: Union[str, Path], + commands: Union[str, list[str]], ws_connection: Optional[WorkspaceConnection] = None, flags: Optional[list[str]] = None, identity: Optional[UserIdentityConfiguration] = None, + name: Optional[str] = None, + register_kwargs: Optional[dict[str, Any]] = None, **kwargs, ) -> None: super().__init__(**kwargs) self.ws_connection = ws_connection - identity = identity or UserIdentityConfiguration() + self.identity = identity or UserIdentityConfiguration() + if isinstance(commands, str): + commands = [commands] if flags: commands[-1] += " " + " ".join(flags) - self.job = command( - command=";".join(commands), - code=code, + self.commands = commands + self.code = Path(code) + self.name = name or self.code.stem + self.register_kwargs = register_kwargs or {} + + @property + def cli_command(self) -> str: + """Actual cli command run on AzureML.""" + return ";".join(self.commands) + + @property + def command(self) -> Command: + """Runnable command.""" + return command( + command=self.cli_command, + code=self.code, environment=self.environment, compute=self.compute.name, inputs=self.inputs, outputs=self.outputs, - identity=identity, + identity=self.identity, ) + def _get_io_dict( + self, ios: dict[str, Union[Input, Output]], keys: Optional[list[str]] = None + ): + ios_as_dict = {} + keys = keys or ["type"] + for key, io_obj in ios.items(): + io_d = dict(io_obj) + ios_as_dict[key] = {k: io_d[k] for k in keys if io_d[k] is not None} + return ios_as_dict + + def _get_component_yaml_stream( + self, name: Optional[str] = None, environment: Optional[str] = None, **kwargs + ) -> io.StringIO: + inputs_dict = self._get_io_dict(self.inputs) + outputs_dict = self._get_io_dict(self.outputs) + if not environment: + self.ml_client.environments.create_or_update(self.environment) + version = self.environment.version or 1 + environment = f"azureml:{self.environment.name}:{version}" + yaml_dict = { + "name": name or self.name, + "inputs": inputs_dict, + "outputs": outputs_dict, + "code": str(self.code), + "command": self.cli_command, + "environment": environment, + **kwargs, + } + yaml_stream = io.StringIO() + yaml.dump(yaml_dict, yaml_stream) + yaml_stream.seek(0) # Move the file pointer to the beginning + return yaml_stream + + def get_component(self, **kwargs): + """Returns the mldesigner component.""" + yaml_file = self._get_component_yaml_stream(**self.register_kwargs, **kwargs) + return load_component(yaml_file) + + @override + def register(self, ml_client: Optional[MLClient] = None, **kwargs): + ml_client = ml_client or self.ml_client + component = self.get_component(**kwargs) + ml_client.components.create_or_update(component) + return component + @override def run(self) -> str: if self.ws_connection: self.ml_client.connections.create_or_update(self.ws_connection) - self.ml_client.environments.create_or_update(self.environment) self.ml_client.begin_create_or_update(self.compute).result() - cloud_job = self.ml_client.create_or_update(self.job) + cloud_job = self.ml_client.create_or_update(self.command) return RunOutput(url=cloud_job.studio_url) diff --git a/src/ez_azml/cloud_runs/commands/docker_command.py b/src/ez_azml/cloud_runs/commands/docker_command.py index f11885e..abdecec 100644 --- a/src/ez_azml/cloud_runs/commands/docker_command.py +++ b/src/ez_azml/cloud_runs/commands/docker_command.py @@ -1,10 +1,10 @@ from ez_azml.entities import DockerEnvironment, DockerWorkspaceConnection from ez_azml.params import DockerParams -from .command import Command +from .command import CommandRun -class DockerCommand(Command): +class DockerCommandRun(CommandRun): """A Command that uses a docker image. Args: diff --git a/src/ez_azml/cloud_runs/pipelines/__init__.py b/src/ez_azml/cloud_runs/pipelines/__init__.py index 2c6c5bb..be769de 100644 --- a/src/ez_azml/cloud_runs/pipelines/__init__.py +++ b/src/ez_azml/cloud_runs/pipelines/__init__.py @@ -1,3 +1,3 @@ -from .pipeline import Pipeline, PipelineCommand +from .pipeline import PipelineRun -__all__ = ["Pipeline", "PipelineCommand"] +__all__ = ["PipelineRun"] diff --git a/src/ez_azml/cloud_runs/pipelines/pipeline.py b/src/ez_azml/cloud_runs/pipelines/pipeline.py index b412376..934ca0e 100644 --- a/src/ez_azml/cloud_runs/pipelines/pipeline.py +++ b/src/ez_azml/cloud_runs/pipelines/pipeline.py @@ -1,39 +1,14 @@ -import re -from dataclasses import dataclass, field from typing import Any, Callable, Optional -import mldesigner as mld from azure.ai.ml.dsl import pipeline as pipeline_dec -from azure.ai.ml.entities import Environment +from azure.ai.ml.entities import Component, PipelineJob from typing_extensions import override from ez_azml.cloud_runs.cloud_run import CloudRun, RunOutput +from ez_azml.cloud_runs.commands import CommandRun -@dataclass -class PipelineCommand: - """Class representing a mldesigner Azureml step (mldesigner.command_component). - - Args: - function: function to be decorated - environment: enviroment to run the comand at - component_kwargs: kwargs to pass to the `mldesigner.command_component` decorator - """ - - function: Callable - environment: Optional[Environment] = None - component_kwargs: Optional[dict[str, Any]] = field(default_factory=dict) - - def __post_init__(self): - self.component_kwargs["environment"] = self.environment - - @property - def name(self): - """Command's name.""" - return self.function.__name__ - - -class Pipeline(CloudRun): +class PipelineRun(CloudRun): """Cloud run that uses mldesigner pipelines. Args: @@ -46,43 +21,44 @@ class Pipeline(CloudRun): def __init__( self, experiment_name: str, - commands: list[PipelineCommand], + commands: dict[str, CommandRun], pipeline: Callable, dec_kwargs: Optional[dict[str, Any]] = None, + register_components: bool = False, **kwargs, ) -> None: super().__init__(**kwargs) self.experiment_name = experiment_name + if isinstance(commands, CommandRun): + commands = [commands] + if isinstance(commands, list): + commands = {command.name: command for command in commands} self.commands = commands self.dec_kwargs = dec_kwargs or {} self.pipeline = pipeline - - def _register_components( - self, commands: list[PipelineCommand] - ) -> list[PipelineCommand]: - kwargs_pattern = r"(\w+)\s*=\s*(['\"]?)(\w+)\2" - for command in commands: - fn = command.function - for parameter in fn.__annotations__: - hint = fn.__annotations__[parameter] - if isinstance(hint, str): - potential_kwargs = { - key: value for key, _, value in re.findall(kwargs_pattern, hint) - } - if "Input(" in hint: - hint = mld.Input(**potential_kwargs) - elif "Output(" in hint: - hint = mld.Output(**potential_kwargs) - fn.__annotations__[parameter] = hint - command.function = mld.command_component(**command.component_kwargs)(fn) - return commands - - def _build_pipeline(self, pipeline: Callable, dec_kwargs: Optional[dict[str, Any]]): - # Inject the decorated functions - for f in self.commands: - pipeline.__globals__[f.__name__] = f - f = pipeline_dec(**dec_kwargs)(pipeline) - return pipeline_dec(**dec_kwargs)(pipeline) + self.register_components = register_components + + def _get_command_components( + self, commands: dict[str, CommandRun] + ) -> list[Component]: + return [ + command.register(self.ml_client) + if self.register_components + else command.get_component() + for command in commands.values() + ] + + def _build_pipeline( + self, + pipeline: Callable, + dec_kwargs: Optional[dict[str, Any]], + components: list[Component], + ): + # Inject the components + for component in components: + pipeline.__globals__[component.name] = component + dec_pipeline = pipeline_dec(**dec_kwargs)(pipeline) + return dec_pipeline def _setup_dec_kwargs(self, dec_kwargs: dict[str, Any]): if self.compute: @@ -90,21 +66,27 @@ def _setup_dec_kwargs(self, dec_kwargs: dict[str, Any]): "default_compute", self.compute.name ) if self.environment: - dec_kwargs["default_compute"] = dec_kwargs.get( - "default_compute", self.compute.name - ) + # TODO: check correct key + dec_kwargs["environment"] = dec_kwargs.get("environment", self.environment) return dec_kwargs @override - def on_run_start(self): - self.commands = self._register_components(self.commands) + def register(self) -> PipelineJob: + for command in self.commands.values(): + command.ml_client = command.ml_client or self.ml_client + self.components = self._get_command_components(self.commands) self.dec_kwargs = self._setup_dec_kwargs(self.dec_kwargs) - self.pipeline = self._build_pipeline(self.pipeline, self.dec_kwargs) + self.pipeline = self._build_pipeline( + self.pipeline, self.dec_kwargs, self.components + ) + self.pipeline_component = self.pipeline(**self.inputs, **self.outputs) + # TODO: add tenacity + self.ml_client.jobs.create_or_update( + self.pipeline_component, experiment_name=self.experiment_name + ) + return self.pipeline_component @override def run(self) -> RunOutput: - pipeline_job = self.pipeline(**self.inputs, **self.outputs) - pipe = self.ml_client.jobs.create_or_update( - pipeline_job, experiment_name=self.experiment_name - ) + pipe = self.register() return RunOutput(url=pipe.studio_url) diff --git a/src/ez_azml/env.py b/src/ez_azml/env.py index dc398f9..cc92aa5 100644 --- a/src/ez_azml/env.py +++ b/src/ez_azml/env.py @@ -9,6 +9,10 @@ DOCKER_TAG = "DOCKER_TAG" DOCKER_IMAGE = "DOCKER_IMAGE" +AZURE_SUBSCRIPTION_ID = "AZURE_SUBSCRIPTION_ID" +AZURE_RESOURCE_GROUP_NAME = "AZURE_RESOURCE_GROUP_NAME" +AZURE_WORKSPACE = "AZURE_WORKSPACE" + def os_field(env_var: str, default: Optional[str] = None) -> Optional[str]: """Dataclass field that retrieves an environment variable. diff --git a/src/ez_azml/ml_client.py b/src/ez_azml/ml_client.py index b6aeffb..271520e 100644 --- a/src/ez_azml/ml_client.py +++ b/src/ez_azml/ml_client.py @@ -6,6 +6,12 @@ from azure.core.credentials import TokenCredential from azure.identity import DefaultAzureCredential +from ez_azml.env import ( + AZURE_RESOURCE_GROUP_NAME, + AZURE_SUBSCRIPTION_ID, + AZURE_WORKSPACE, +) + class EzMLClient(MLClient): """A easy client class to interact with Azure ML services. @@ -30,11 +36,11 @@ def __init__( **kwargs: Any, ) -> None: credential = credential or DefaultAzureCredential() - subscription_id = subscription_id or os.getenv("AZURE_SUBSCRIPTION_ID") + subscription_id = subscription_id or os.getenv(AZURE_SUBSCRIPTION_ID) resource_group_name = resource_group_name or os.getenv( - "AZURE_RESOURCE_GROUP_NAME" + AZURE_RESOURCE_GROUP_NAME ) - workspace_name = workspace_name or os.getenv("AZURE_WORKSPACE") + workspace_name = workspace_name or os.getenv(AZURE_WORKSPACE) super().__init__( credential, subscription_id, resource_group_name, workspace_name, **kwargs ) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py new file mode 100644 index 0000000..0252929 --- /dev/null +++ b/tests/cli/test_cli.py @@ -0,0 +1,25 @@ +from unittest import mock + +import pytest +from ez_azml.cli import EzAzureMLCLI + + +@pytest.mark.parametrize("command", ["run", "register"]) +def test_cloud_run_hooks_call(command: str): + """Tests that cli is calling pre and post hooks.""" + mock_cloud_run = mock.MagicMock() + cli = EzAzureMLCLI(cloud_run=mock_cloud_run) + + getattr(cli, command)() + + expected = [f"on_{command}_start", command, f"on_{command}_end"] + for method in expected: + getattr(mock_cloud_run, method).assert_called_once() + + +def test_ml_client_set(): + """Tests that ml_client is injected.""" + mock_cloud_run = mock.MagicMock() + mock_cloud_run.ml_client = None + cli = EzAzureMLCLI(cloud_run=mock_cloud_run) + assert cli.ml_client == mock_cloud_run.ml_client diff --git a/tests/cloud_runs/commands/test_commands.py b/tests/cloud_runs/commands/test_commands.py new file mode 100644 index 0000000..93b4239 --- /dev/null +++ b/tests/cloud_runs/commands/test_commands.py @@ -0,0 +1,25 @@ +from pathlib import Path + +import yaml +from azure.ai.ml.entities import CommandComponent +from ez_azml.cloud_runs import CommandRun + + +def test_get_component_yaml(command: CommandRun, command_code_path: Path): + """Tests that the yaml is properly generated.""" + yaml_io = command._get_component_yaml_stream(**command.register_kwargs) + expected_yaml = { + "code": str(command_code_path), + "command": "python command.py", + "environment": "azureml:test_environment:1", + "inputs": {"command_input": {"type": "uri_folder"}}, + "name": "test_command", + "outputs": {}, + } + assert expected_yaml == yaml.safe_load(yaml_io) + + +def test_e2e_register(command: CommandRun): + """Tests e2e mock register.""" + component = command.register() + assert isinstance(component, CommandComponent) diff --git a/tests/cloud_runs/pipelines/test_pipeline.py b/tests/cloud_runs/pipelines/test_pipeline.py index 026e835..2909ad7 100644 --- a/tests/cloud_runs/pipelines/test_pipeline.py +++ b/tests/cloud_runs/pipelines/test_pipeline.py @@ -1,49 +1,8 @@ -import inspect -from typing import Callable -from unittest import mock +from azure.ai.ml.entities import PipelineJob +from ez_azml.cloud_runs import PipelineRun -from ez_azml.cloud_runs.pipelines import Pipeline - -def assert_function_wrapped(function: Callable, wrapped_function: Callable) -> None: - """Asserts that a function is properly wrapped. - - Args: - wrapped_function: The function that is potentially wrapped. - function: The original function to compare against after unwrapping. - - Raises: - AssertionError: If the function is already wrapped or if unwrapping the wrapped - function does not yield the original function. - """ - assert not hasattr(function, "__wrapped__"), "The function should not be wrapped." - assert ( - inspect.unwrap(wrapped_function) is function - ), "Unwrapping the wrapped function did not yield the original function." - - -def test_pipeline_register_components(pipeline: Pipeline): - """Tests that the components are properly decorated.""" - raw_functions = [c.function for c in pipeline.commands] - registered_commands = pipeline._register_components(pipeline.commands) - for raw_function, registered in zip(raw_functions, registered_commands): - assert_function_wrapped(raw_function, registered.function) - - -def test_pipeline_setup_dec_kwargs(pipeline: Pipeline): - """Tests that the pipeline decorator kwargs are properly postprocessed.""" - pipeline.dec_kwargs = pipeline._setup_dec_kwargs(pipeline.dec_kwargs) - - -def test_build_pipeline(pipeline: Pipeline): - """Tests that the pipeline is properly decorated.""" - - def f(): - pass - - with mock.patch.object(pipeline, "commands", return_value=f) as mocked: - mocked.__name__ = "command_fn" - - raw_pipeline = pipeline.pipeline - built_pipeline = pipeline._build_pipeline(raw_pipeline, pipeline.dec_kwargs) - assert_function_wrapped(raw_pipeline, built_pipeline) +def test_e2e_register(pipeline: PipelineRun): + """Tests e2e mock register.""" + component = pipeline.register() + assert isinstance(component, PipelineJob) diff --git a/tests/conftest.py b/tests/conftest.py index 8ff4003..0b47be6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,48 +1,86 @@ +from pathlib import Path from typing import Callable +from unittest import mock -import mldesigner as mld import pytest -from azure.ai.ml import Input, Output -from ez_azml.cloud_runs.pipelines import Pipeline, PipelineCommand +from azure.ai.ml import Input, MLClient, Output +from azure.ai.ml.entities import Environment +from ez_azml.cloud_runs import CommandRun, PipelineRun @pytest.fixture() -def command_fn() -> Callable: - """Function used in mldesigner command component.""" - - def command__fn( - command_input: mld.Input(type="uri_folder"), # type: ignore - command_output: mld.Output(type="uri_folder"), # type: ignore +def ml_client() -> MLClient: # type: ignore + """MLClient fixture.""" + mock_credential = mock.MagicMock() + ml_client = MLClient(credential=mock_credential) + with ( + mock.patch.object(ml_client.components, "create_or_update", return_value=True), + mock.patch.object(ml_client.jobs, "create_or_update", return_value=True), + mock.patch.object( + ml_client.environments, "create_or_update", return_value=True + ), ): - print("this is a test_fn") + yield ml_client + - return command__fn +@pytest.fixture() +def conda_env_file() -> Path: + """Path to a conda env file.""" + return Path(__file__).parent.parent / "configs/environments/conda.yaml" @pytest.fixture() -def pipeline_fn(command_fn) -> Callable: +def environment(conda_env_file: Path) -> Environment: + """Environment fixture.""" + return Environment( + image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04", + name="test_environment", + conda_file=str(conda_env_file), + ) + + +@pytest.fixture() +def command_code_path(tmp_path: Path) -> Path: + """Points to tmp file.""" + path = tmp_path / "test_command.py" + path.write_text(""" + print('this is a test command') + """) + return path + + +@pytest.fixture() +def command(ml_client: MLClient, command_code_path: Path, environment: Environment): + """CommandRun fixture.""" + inputs = {"command_input": Input()} + return CommandRun( + ml_client=ml_client, + code=command_code_path, + commands="python command.py", + environment=environment, + inputs=inputs, + ) + + +@pytest.fixture() +def pipeline_fn() -> Callable: """Function used in azure.ai.ml pipelines.""" def pipeline__fn( pipeline_input: Input, pipeline_output: Output, ): - command_fn(pipeline_input) + test_command(command_input=pipeline_input) # type: ignore # noqa F821 return pipeline__fn @pytest.fixture() -def pipeline_command(command_fn: Callable): - """PipelineCommand fixture.""" - return PipelineCommand(function=command_fn) - - -@pytest.fixture() -def pipeline(pipeline_fn: Callable, pipeline_command: PipelineCommand): - """Pipeline cloud run fixture.""" - return Pipeline( +def pipeline(ml_client: MLClient, pipeline_fn: Callable, command: CommandRun): + """PipelineRun cloud run fixture.""" + return PipelineRun( + ml_client=ml_client, experiment_name="fixture", - commands=[pipeline_command], + commands=[command], pipeline=pipeline_fn, ) diff --git a/tests/ml_client/test_ez_ml_client.py b/tests/ml_client/test_ez_ml_client.py new file mode 100644 index 0000000..36af8d5 --- /dev/null +++ b/tests/ml_client/test_ez_ml_client.py @@ -0,0 +1,20 @@ +from unittest import mock + +from ez_azml import EzMLClient +from ez_azml.env import ( + AZURE_RESOURCE_GROUP_NAME, + AZURE_SUBSCRIPTION_ID, + AZURE_WORKSPACE, +) + +from tests.utils import mock_os_getenv + + +@mock.patch("os.getenv", side_effect=mock_os_getenv) +def test_ml_client_envs(mock_os_getenv): + """Tests that init properly retrieves env variables.""" + client = EzMLClient() + + assert client.subscription_id == mock_os_getenv(AZURE_SUBSCRIPTION_ID) + assert client.resource_group_name == mock_os_getenv(AZURE_RESOURCE_GROUP_NAME) + assert client.workspace_name == mock_os_getenv(AZURE_WORKSPACE) diff --git a/tests/params/test_docker_params.py b/tests/params/test_docker_params.py new file mode 100644 index 0000000..8958b29 --- /dev/null +++ b/tests/params/test_docker_params.py @@ -0,0 +1,52 @@ +from unittest import mock + +import pytest +from ez_azml.params import DockerParams # Replace with the actual import path + +from tests.utils import mock_env_vars, mock_os_getenv + + +@pytest.mark.parametrize( + "kwargs", + [ + ({}), + ( + { + "image_name": "custom_image", + "registry": "custom_registry", + "tag": "custom_tag", + "username": "custom_username", + "password": "custom_password", + } + ), + ({"image_name": "custom_image", "registry": "custom_registry"}), + ({"image_name": "custom_image", "tag": "custom_tag"}), + ({"image_name": "custom_image", "registry": "", "tag": "custom_tag"}), + ], +) +@mock.patch("os.getenv", side_effect=mock_os_getenv) +def test_docker_params(mock_os_getenv, kwargs): + """Tests docker params constructor.""" + # Initialize DockerParams with the given kwargs + params = DockerParams(**kwargs) + + # Derive expected values based on kwargs or mock_env_vars + expected_image_name = kwargs.get("image_name", mock_env_vars["DOCKER_IMAGE"]) + expected_registry = kwargs.get("registry", mock_env_vars["DOCKER_REGISTRY"]) + expected_tag = kwargs.get("tag", mock_env_vars["DOCKER_TAG"]) + expected_username = kwargs.get("username", mock_env_vars["DOCKER_USERNAME"]) + expected_password = kwargs.get("password", mock_env_vars["DOCKER_PASSWORD"]) + + # Build the expected image string + if expected_registry: + expected_image = f"{expected_registry}/{expected_image_name}:{expected_tag}" + else: + expected_image = f"{expected_image_name}:{expected_tag}" + + # Assertions + assert params.image_name == expected_image_name + assert params.registry == expected_registry + assert params.tag == expected_tag + assert params.username == expected_username + assert params.password == expected_password + assert params.image == expected_image diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..b2a11df --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,29 @@ +# Mock environment variables +from ez_azml import env + +MOCK_DOCKER_IMAGE = "MOCK_DOCKER_IMAGE" +MOCK_DOCKER_REGISTRY = "MOCK_DOCKER_REGISTRY" +MOCK_DOCKER_TAG = "MOCK_DOCKER_TAG" +MOCK_DOCKER_USERNAME = "MOCK_DOCKER_USERNAME" +MOCK_DOCKER_PASSWORD = "MOCK_DOCKER_PASSWORD" # noqa: S105 +MOCK_AZURE_SUBSCRIPTION_ID = "MOCK_AZURE_SUBSCRIPTION_ID" +MOCK_AZURE_RESOURCE_GROUP_NAME = "MOCK_AZURE_RESOURCE_GROUP_NAME" +MOCK_AZURE_WORKSPACE = "MOCK_AZURE_WORKSPACE" + +# Rebuild the dictionary using the new variables +mock_env_vars = { + env.DOCKER_IMAGE: MOCK_DOCKER_IMAGE, + env.DOCKER_REGISTRY: MOCK_DOCKER_REGISTRY, + env.DOCKER_TAG: MOCK_DOCKER_TAG, + env.DOCKER_USERNAME: MOCK_DOCKER_USERNAME, + env.DOCKER_PASSWORD: MOCK_DOCKER_PASSWORD, + env.AZURE_SUBSCRIPTION_ID: MOCK_AZURE_SUBSCRIPTION_ID, + env.AZURE_RESOURCE_GROUP_NAME: MOCK_AZURE_RESOURCE_GROUP_NAME, + env.AZURE_WORKSPACE: MOCK_AZURE_RESOURCE_GROUP_NAME, +} + + +# Mock env.os_field function to return environment variables +def mock_os_getenv(env_var, default=None): + """Mocks getenv.""" + return mock_env_vars.get(env_var, default)