From 298630f5942f0f59fed15bdc9138410a41120d03 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Thu, 8 Aug 2024 14:31:42 +0200 Subject: [PATCH 1/7] modified example --- .env.sample | 3 +-- README.md | 4 ++-- configs/environments/conda.yaml | 6 +++--- configs/pipeline/main.yaml | 4 ++-- examples/pipelines.py | 8 ++++---- src/ez_azml/cloud_runs/pipelines/pipeline.py | 5 ++--- tests/conftest.py | 2 +- 7 files changed, 15 insertions(+), 17 deletions(-) diff --git a/.env.sample b/.env.sample index cca6cf9..909e568 100644 --- a/.env.sample +++ b/.env.sample @@ -6,6 +6,5 @@ AZURE_WORKSPACE= # Docker parameters DOCKER_REGISTRY= DOCKER_TAG= -## Gitlab tokens DOCKER_USERNAME= -DOCKER_PASSWORD= \ No newline at end of file +DOCKER_PASSWORD= diff --git a/README.md b/README.md index ea86c80..41e3ff3 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ It aims to: Simply run ```bash -pip install ezazml +pip install ez-azml ``` # Quickstart The main entrypoint is the CLI command. The following command will show you the help @@ -33,4 +33,4 @@ ez-azml --config configs/pytorch/main.yaml run ## Pipeline ``` ez-azml --config configs/pipeline/main.yaml run -``` \ No newline at end of file +``` diff --git a/configs/environments/conda.yaml b/configs/environments/conda.yaml index a9de33d..41294fa 100644 --- a/configs/environments/conda.yaml +++ b/configs/environments/conda.yaml @@ -2,10 +2,10 @@ name: default_environment channels: - defaults dependencies: - - python=3.8.12 + - python=3.9.12 - pip=21.2.2 - pip: - - mldesigner==0.1.0b17 + - mldesigner==0.1.0b18 - azure-ai-ml==1.18.0 - azureml-mlflow==1.56.0 - - ez-azml==0.1.0 + - ez-azml==0.1.1 diff --git a/configs/pipeline/main.yaml b/configs/pipeline/main.yaml index 1476989..88d5de1 100644 --- a/configs/pipeline/main.yaml +++ b/configs/pipeline/main.yaml @@ -3,10 +3,10 @@ cloud_run: init_args: experiment_name: "pipelines example" commands: - - function: examples.pipelines.test_fn + - function: examples.pipelines.ez_azml_fn environment: ../environments/pipeline.yaml - pipeline: examples.pipelines.test_pipeline + pipeline: examples.pipelines.ez_azml_pipeline compute: ../clusters/cpu_raw.yaml inputs: test_input: diff --git a/examples/pipelines.py b/examples/pipelines.py index 18dc72e..720e498 100644 --- a/examples/pipelines.py +++ b/examples/pipelines.py @@ -1,16 +1,16 @@ -def test_fn( +def ez_azml_fn( test_input: "Input(type='uri_file')", # type: ignore # noqa: F821 test_output: "Output(type='uri_folder')", # type: ignore # noqa: F821 ): """Test input function.""" out_path = f"{test_output}/out_file.txt" - with open(str(input)) as f_in, open(out_path, "w") as f_out: + with open(str(test_input)) as f_in, open(out_path, "w") as f_out: content = f_in.read() print(content) f_out.write(content) -def test_pipeline(test_input): +def ez_azml_pipeline(test_input): """Test pipeline function.""" - test_result = test_fn(test_input=test_input) + test_result = ez_azml_fn(test_input=test_input) return {"test_output": test_result.outputs.test_output} diff --git a/src/ez_azml/cloud_runs/pipelines/pipeline.py b/src/ez_azml/cloud_runs/pipelines/pipeline.py index b412376..8965db6 100644 --- a/src/ez_azml/cloud_runs/pipelines/pipeline.py +++ b/src/ez_azml/cloud_runs/pipelines/pipeline.py @@ -79,9 +79,8 @@ def _register_components( def _build_pipeline(self, pipeline: Callable, dec_kwargs: Optional[dict[str, Any]]): # Inject the decorated functions - for f in self.commands: - pipeline.__globals__[f.__name__] = f - f = pipeline_dec(**dec_kwargs)(pipeline) + for command in self.commands: + pipeline.__globals__[command.name] = command.function return pipeline_dec(**dec_kwargs)(pipeline) def _setup_dec_kwargs(self, dec_kwargs: dict[str, Any]): diff --git a/tests/conftest.py b/tests/conftest.py index 8ff4003..63504e8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,7 +14,7 @@ def command__fn( command_input: mld.Input(type="uri_folder"), # type: ignore command_output: mld.Output(type="uri_folder"), # type: ignore ): - print("this is a test_fn") + print("this is a test function") return command__fn From 8461d0ee66a767b08b9c48fb045b7ecea18023d5 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Fri, 9 Aug 2024 11:18:15 +0200 Subject: [PATCH 2/7] Working examples --- .github/workflows/python-publish.yml | 2 +- README.md | 7 +- configs/{pytorch => command}/main.yaml | 4 +- .../docker.yaml => dockers/pytorch.yaml} | 0 configs/pipeline/main.yaml | 32 +++++- examples/pipeline.py | 17 ++++ examples/pipelines.py | 16 --- src/ez_azml/cli/ez_azml.py | 7 ++ src/ez_azml/cloud_runs/__init__.py | 4 +- src/ez_azml/cloud_runs/cloud_run.py | 12 +++ src/ez_azml/cloud_runs/commands/__init__.py | 6 +- src/ez_azml/cloud_runs/commands/command.py | 82 +++++++++++++--- .../cloud_runs/commands/docker_command.py | 4 +- src/ez_azml/cloud_runs/pipelines/__init__.py | 4 +- src/ez_azml/cloud_runs/pipelines/pipeline.py | 97 +++++++------------ 15 files changed, 186 insertions(+), 108 deletions(-) rename configs/{pytorch => command}/main.yaml (88%) rename configs/{pytorch/docker.yaml => dockers/pytorch.yaml} (100%) create mode 100644 examples/pipeline.py delete mode 100644 examples/pipelines.py diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index fedcded..4c85341 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -23,5 +23,5 @@ jobs: - uses: pdm-project/setup-pdm@v4.1 - name: Publish package distributions to PyPI - run: + run: pdm publish diff --git a/README.md b/README.md index 41e3ff3..b5cdc78 100644 --- a/README.md +++ b/README.md @@ -25,12 +25,13 @@ The main entrypoint is the CLI command. The following command will show you the ez-azml -h ``` -You can run an example with +You can run an example with (if you have cloned the repo) ## Command ``` -ez-azml --config configs/pytorch/main.yaml run +ez-azml --config configs/command/main.yaml run ``` ## Pipeline ``` -ez-azml --config configs/pipeline/main.yaml run +PYTHONPATH=$PYTHONPATH:. ez-azml --config configs/pipeline/main.yaml run ``` +(`PYTHONPATH` to point to the examples dir. This is not needed if your pipeline is available in any package as an importable function) diff --git a/configs/pytorch/main.yaml b/configs/command/main.yaml similarity index 88% rename from configs/pytorch/main.yaml rename to configs/command/main.yaml index 0b0a51d..471f1b1 100644 --- a/configs/pytorch/main.yaml +++ b/configs/command/main.yaml @@ -1,7 +1,7 @@ cloud_run: - class_path: ez_azml.cloud_runs.DockerCommand + class_path: ez_azml.cloud_runs.DockerCommandRun init_args: - docker: docker.yaml + docker: ../dockers/pytorch.yaml code: examples compute: ../clusters/cpu.yaml inputs: diff --git a/configs/pytorch/docker.yaml b/configs/dockers/pytorch.yaml similarity index 100% rename from configs/pytorch/docker.yaml rename to configs/dockers/pytorch.yaml diff --git a/configs/pipeline/main.yaml b/configs/pipeline/main.yaml index 88d5de1..cb1d8fa 100644 --- a/configs/pipeline/main.yaml +++ b/configs/pipeline/main.yaml @@ -3,10 +3,36 @@ cloud_run: init_args: experiment_name: "pipelines example" commands: - - function: examples.pipelines.ez_azml_fn - environment: ../environments/pipeline.yaml + torch: + class_path: ez_azml.cloud_runs.DockerCommandRun + init_args: + docker: ../dockers/pytorch.yaml + code: examples/pytorch.py + compute: ../clusters/cpu.yaml + inputs: + data_path: + class_path: azure.ai.ml.Input + init_args: + type: uri_folder + path: azureml://datastores/workspaceblobstore/paths/data + mode: ro_mount + outputs: + output_path: + class_path: azure.ai.ml.Output + init_args: + type: uri_folder + path: azureml://datastores/workspaceblobstore/paths/outputs + mode: rw_mount + commands: [ + echo 'This is an example', + python pytorch.py + ] + flags: [ + "--learning_rate 1e-6", + "--output_path ${{outputs.output_path}}" + ] - pipeline: examples.pipelines.ez_azml_pipeline + pipeline: examples.pipeline.ez_azml_pipeline compute: ../clusters/cpu_raw.yaml inputs: test_input: diff --git a/examples/pipeline.py b/examples/pipeline.py new file mode 100644 index 0000000..e459403 --- /dev/null +++ b/examples/pipeline.py @@ -0,0 +1,17 @@ +# def ez_azml_command_fn( +# test_input: "Input(type='uri_file')", # type: ignore +# test_output: "Output(type='uri_folder')", # type: ignore +# ): +# """Example command function.""" +# print("This is an example!") +# out_path = f"{test_output}/out_file.txt" +# with open(str(test_input)) as f_in, open(out_path, "w") as f_out: +# content = f_in.read() +# print(content) +# f_out.write(content) + + +def ez_azml_pipeline(test_input): + """Example pipeline function.""" + test_result = pytorch(data_path=test_input) # noqa F821 + return {"test_output": test_result.outputs.output_path} diff --git a/examples/pipelines.py b/examples/pipelines.py deleted file mode 100644 index 720e498..0000000 --- a/examples/pipelines.py +++ /dev/null @@ -1,16 +0,0 @@ -def ez_azml_fn( - test_input: "Input(type='uri_file')", # type: ignore # noqa: F821 - test_output: "Output(type='uri_folder')", # type: ignore # noqa: F821 -): - """Test input function.""" - out_path = f"{test_output}/out_file.txt" - with open(str(test_input)) as f_in, open(out_path, "w") as f_out: - content = f_in.read() - print(content) - f_out.write(content) - - -def ez_azml_pipeline(test_input): - """Test pipeline function.""" - test_result = ez_azml_fn(test_input=test_input) - return {"test_output": test_result.outputs.test_output} diff --git a/src/ez_azml/cli/ez_azml.py b/src/ez_azml/cli/ez_azml.py index 30bf491..dc2d97b 100644 --- a/src/ez_azml/cli/ez_azml.py +++ b/src/ez_azml/cli/ez_azml.py @@ -43,3 +43,10 @@ def run(self): output = self.cloud_run.run() logger.info(f"Run available at {output.url}") self.cloud_run.on_run_end(output) + + def register(self): + """Registers the cloud run as a reusable component.""" + self.cloud_run.on_register_start() + self.cloud_run.register() + self.cloud_run.on_register_end() + logger.info("Cloud run registered!") diff --git a/src/ez_azml/cloud_runs/__init__.py b/src/ez_azml/cloud_runs/__init__.py index faad41b..2faf956 100644 --- a/src/ez_azml/cloud_runs/__init__.py +++ b/src/ez_azml/cloud_runs/__init__.py @@ -1,4 +1,4 @@ -from .commands import Command, DockerCommand +from .commands import CommandRun, DockerCommandRun from .pipelines import Pipeline -__all__ = ["Command", "DockerCommand", "Pipeline"] +__all__ = ["CommandRun", "DockerCommandRun", "Pipeline"] diff --git a/src/ez_azml/cloud_runs/cloud_run.py b/src/ez_azml/cloud_runs/cloud_run.py index caa759a..c2d08a3 100644 --- a/src/ez_azml/cloud_runs/cloud_run.py +++ b/src/ez_azml/cloud_runs/cloud_run.py @@ -47,3 +47,15 @@ def run(self) -> RunOutput: def on_run_end(self, output: RunOutput): """Hook called once run has been submitted.""" return + + def on_register_start(self): + """Hook called before a component has been registered.""" + return + + @abstractmethod + def register(self): + """Registers the run as a reusable component.""" + + def on_register_end(self): + """Hook called after a component has been registered.""" + return diff --git a/src/ez_azml/cloud_runs/commands/__init__.py b/src/ez_azml/cloud_runs/commands/__init__.py index 2fd3604..a8a7bed 100644 --- a/src/ez_azml/cloud_runs/commands/__init__.py +++ b/src/ez_azml/cloud_runs/commands/__init__.py @@ -1,4 +1,4 @@ -from .command import Command -from .docker_command import DockerCommand +from .command import CommandRun +from .docker_command import DockerCommandRun -__all__ = ["DockerCommand", "Command"] +__all__ = ["DockerCommandRun", "CommandRun"] diff --git a/src/ez_azml/cloud_runs/commands/command.py b/src/ez_azml/cloud_runs/commands/command.py index 1f6dd36..aeabee1 100644 --- a/src/ez_azml/cloud_runs/commands/command.py +++ b/src/ez_azml/cloud_runs/commands/command.py @@ -1,16 +1,16 @@ -from typing import Optional +import io +from pathlib import Path +from typing import Optional, Union -from azure.ai.ml import command -from azure.ai.ml.entities import ( - UserIdentityConfiguration, - WorkspaceConnection, -) +import yaml +from azure.ai.ml import Input, MLClient, Output, command, load_component +from azure.ai.ml.entities import Command, UserIdentityConfiguration, WorkspaceConnection from typing_extensions import override from ez_azml.cloud_runs.cloud_run import CloudRun, RunOutput -class Command(CloudRun): +class CommandRun(CloudRun): """Cloud run that is based on AzureML Commands. Args: @@ -27,28 +27,84 @@ def __init__( ws_connection: Optional[WorkspaceConnection] = None, flags: Optional[list[str]] = None, identity: Optional[UserIdentityConfiguration] = None, + name: Optional[str] = None, **kwargs, ) -> None: super().__init__(**kwargs) self.ws_connection = ws_connection - identity = identity or UserIdentityConfiguration() + self.identity = identity or UserIdentityConfiguration() if flags: commands[-1] += " " + " ".join(flags) - self.job = command( - command=";".join(commands), - code=code, + self.commands = commands + self.code = Path(code) + self.name = name or self.code.stem + + @property + def cli_command(self) -> str: + """Actual cli command run on AzureML.""" + return ";".join(self.commands) + + @property + def command(self) -> Command: + """Runnable command.""" + return command( + command=self.cli_command, + code=self.code, environment=self.environment, compute=self.compute.name, inputs=self.inputs, outputs=self.outputs, - identity=identity, + identity=self.identity, ) + def _get_io_dict( + self, ios: dict[str, Union[Input, Output]], keys: Optional[list[str]] = None + ): + ios_as_dict = {} + keys = keys or ["type"] + for key, io_obj in ios.items(): + io_d = dict(io_obj) + ios_as_dict[key] = {k: io_d[k] for k in keys if io_d[k] is not None} + return ios_as_dict + + def _get_component_yaml_stream( + self, name: Optional[str] = None, environment: Optional[str] = None, **kwargs + ) -> io.StringIO: + inputs_dict = self._get_io_dict(self.inputs) + outputs_dict = self._get_io_dict(self.outputs) + if not environment: + version = self.environment.version or 1 + environment = f"azureml:{self.environment.name}:{version}" + yaml_dict = { + "name": name or self.name, + "inputs": inputs_dict, + "outputs": outputs_dict, + "code": str(self.code), + "command": self.cli_command, + "environment": environment, + **kwargs, + } + yaml_stream = io.StringIO() + yaml.dump(yaml_dict, yaml_stream) + yaml_stream.seek(0) # Move the file pointer to the beginning + return yaml_stream + + def _get_component(self, **kwargs): + yaml_file = self._get_component_yaml_stream(**kwargs) + return load_component(yaml_file) + + @override + def register(self, ml_client: Optional[MLClient] = None, **kwargs): + ml_client = ml_client or self.ml_client + component = self._get_component(**kwargs) + ml_client.components.create_or_update(component) + return component + @override def run(self) -> str: if self.ws_connection: self.ml_client.connections.create_or_update(self.ws_connection) self.ml_client.environments.create_or_update(self.environment) self.ml_client.begin_create_or_update(self.compute).result() - cloud_job = self.ml_client.create_or_update(self.job) + cloud_job = self.ml_client.create_or_update(self.command) return RunOutput(url=cloud_job.studio_url) diff --git a/src/ez_azml/cloud_runs/commands/docker_command.py b/src/ez_azml/cloud_runs/commands/docker_command.py index f11885e..abdecec 100644 --- a/src/ez_azml/cloud_runs/commands/docker_command.py +++ b/src/ez_azml/cloud_runs/commands/docker_command.py @@ -1,10 +1,10 @@ from ez_azml.entities import DockerEnvironment, DockerWorkspaceConnection from ez_azml.params import DockerParams -from .command import Command +from .command import CommandRun -class DockerCommand(Command): +class DockerCommandRun(CommandRun): """A Command that uses a docker image. Args: diff --git a/src/ez_azml/cloud_runs/pipelines/__init__.py b/src/ez_azml/cloud_runs/pipelines/__init__.py index 2c6c5bb..d2381c3 100644 --- a/src/ez_azml/cloud_runs/pipelines/__init__.py +++ b/src/ez_azml/cloud_runs/pipelines/__init__.py @@ -1,3 +1,3 @@ -from .pipeline import Pipeline, PipelineCommand +from .pipeline import Pipeline -__all__ = ["Pipeline", "PipelineCommand"] +__all__ = ["Pipeline"] diff --git a/src/ez_azml/cloud_runs/pipelines/pipeline.py b/src/ez_azml/cloud_runs/pipelines/pipeline.py index 8965db6..aec1e00 100644 --- a/src/ez_azml/cloud_runs/pipelines/pipeline.py +++ b/src/ez_azml/cloud_runs/pipelines/pipeline.py @@ -1,36 +1,11 @@ -import re -from dataclasses import dataclass, field from typing import Any, Callable, Optional -import mldesigner as mld from azure.ai.ml.dsl import pipeline as pipeline_dec -from azure.ai.ml.entities import Environment +from azure.ai.ml.entities import Component from typing_extensions import override from ez_azml.cloud_runs.cloud_run import CloudRun, RunOutput - - -@dataclass -class PipelineCommand: - """Class representing a mldesigner Azureml step (mldesigner.command_component). - - Args: - function: function to be decorated - environment: enviroment to run the comand at - component_kwargs: kwargs to pass to the `mldesigner.command_component` decorator - """ - - function: Callable - environment: Optional[Environment] = None - component_kwargs: Optional[dict[str, Any]] = field(default_factory=dict) - - def __post_init__(self): - self.component_kwargs["environment"] = self.environment - - @property - def name(self): - """Command's name.""" - return self.function.__name__ +from ez_azml.cloud_runs.commands import CommandRun class Pipeline(CloudRun): @@ -46,42 +21,37 @@ class Pipeline(CloudRun): def __init__( self, experiment_name: str, - commands: list[PipelineCommand], + commands: dict[str, CommandRun], pipeline: Callable, dec_kwargs: Optional[dict[str, Any]] = None, **kwargs, ) -> None: super().__init__(**kwargs) self.experiment_name = experiment_name + if isinstance(commands, CommandRun): + commands = [commands] + if isinstance(commands, list): + commands = {command.name: command for command in commands} self.commands = commands self.dec_kwargs = dec_kwargs or {} self.pipeline = pipeline - def _register_components( - self, commands: list[PipelineCommand] - ) -> list[PipelineCommand]: - kwargs_pattern = r"(\w+)\s*=\s*(['\"]?)(\w+)\2" - for command in commands: - fn = command.function - for parameter in fn.__annotations__: - hint = fn.__annotations__[parameter] - if isinstance(hint, str): - potential_kwargs = { - key: value for key, _, value in re.findall(kwargs_pattern, hint) - } - if "Input(" in hint: - hint = mld.Input(**potential_kwargs) - elif "Output(" in hint: - hint = mld.Output(**potential_kwargs) - fn.__annotations__[parameter] = hint - command.function = mld.command_component(**command.component_kwargs)(fn) - return commands + def _register_components(self, commands: dict[str, CommandRun]) -> list[Component]: + return [command.register(self.ml_client) for command in commands.values()] - def _build_pipeline(self, pipeline: Callable, dec_kwargs: Optional[dict[str, Any]]): - # Inject the decorated functions - for command in self.commands: - pipeline.__globals__[command.name] = command.function - return pipeline_dec(**dec_kwargs)(pipeline) + def _build_pipeline( + self, + pipeline: Callable, + dec_kwargs: Optional[dict[str, Any]], + components: list[Component], + ): + # Inject the components + for component in components: + pipeline.__globals__[component.name] = component + dec_pipeline = pipeline_dec(**dec_kwargs)(pipeline) + # for component in components: + # dec_pipeline.__globals__[component.name] = component + return dec_pipeline def _setup_dec_kwargs(self, dec_kwargs: dict[str, Any]): if self.compute: @@ -89,21 +59,26 @@ def _setup_dec_kwargs(self, dec_kwargs: dict[str, Any]): "default_compute", self.compute.name ) if self.environment: - dec_kwargs["default_compute"] = dec_kwargs.get( - "default_compute", self.compute.name - ) + # TODO: check correct key + dec_kwargs["environment"] = dec_kwargs.get("environment", self.environment) return dec_kwargs + @override + def register(self): + return self.ml_client.jobs.create_or_update( + self.pipeline_job, experiment_name=self.experiment_name + ) + @override def on_run_start(self): - self.commands = self._register_components(self.commands) + self.components = self._register_components(self.commands) self.dec_kwargs = self._setup_dec_kwargs(self.dec_kwargs) - self.pipeline = self._build_pipeline(self.pipeline, self.dec_kwargs) + self.pipeline = self._build_pipeline( + self.pipeline, self.dec_kwargs, self.components + ) @override def run(self) -> RunOutput: - pipeline_job = self.pipeline(**self.inputs, **self.outputs) - pipe = self.ml_client.jobs.create_or_update( - pipeline_job, experiment_name=self.experiment_name - ) + self.pipeline_job = self.pipeline(**self.inputs, **self.outputs) + pipe = self.register() return RunOutput(url=pipe.studio_url) From 3718f33138ff047ee09147117ce8f749e747d4fe Mon Sep 17 00:00:00 2001 From: Alejandro Date: Fri, 9 Aug 2024 12:52:18 +0200 Subject: [PATCH 3/7] first wave of test refactor --- README.md | 2 +- configs/command/main.yaml | 2 +- configs/pipeline/main.yaml | 9 +++-- examples/pipeline.py | 16 +------- examples/{pytorch.py => pytorch_script.py} | 0 src/ez_azml/cloud_runs/__init__.py | 4 +- src/ez_azml/cloud_runs/commands/command.py | 17 ++++++--- src/ez_azml/cloud_runs/pipelines/__init__.py | 4 +- src/ez_azml/cloud_runs/pipelines/pipeline.py | 30 ++++++++------- tests/cloud_runs/pipelines/test_pipeline.py | 30 ++++----------- tests/conftest.py | 39 +++++++++----------- 11 files changed, 68 insertions(+), 85 deletions(-) rename examples/{pytorch.py => pytorch_script.py} (100%) diff --git a/README.md b/README.md index b5cdc78..4611b53 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ You can run an example with (if you have cloned the repo) ``` ez-azml --config configs/command/main.yaml run ``` -## Pipeline +## PipelineRun ``` PYTHONPATH=$PYTHONPATH:. ez-azml --config configs/pipeline/main.yaml run ``` diff --git a/configs/command/main.yaml b/configs/command/main.yaml index 471f1b1..b9aaea9 100644 --- a/configs/command/main.yaml +++ b/configs/command/main.yaml @@ -20,7 +20,7 @@ cloud_run: mode: rw_mount commands: [ echo 'This is an example', - python pytorch.py + python pytorch_script.py ] flags: [ "--learning_rate 1e-6", diff --git a/configs/pipeline/main.yaml b/configs/pipeline/main.yaml index cb1d8fa..e893ab6 100644 --- a/configs/pipeline/main.yaml +++ b/configs/pipeline/main.yaml @@ -1,5 +1,5 @@ cloud_run: - class_path: ez_azml.cloud_runs.Pipeline + class_path: ez_azml.cloud_runs.PipelineRun init_args: experiment_name: "pipelines example" commands: @@ -7,7 +7,7 @@ cloud_run: class_path: ez_azml.cloud_runs.DockerCommandRun init_args: docker: ../dockers/pytorch.yaml - code: examples/pytorch.py + code: examples/pytorch_script.py compute: ../clusters/cpu.yaml inputs: data_path: @@ -25,12 +25,15 @@ cloud_run: mode: rw_mount commands: [ echo 'This is an example', - python pytorch.py + python pytorch_script.py ] flags: [ "--learning_rate 1e-6", "--output_path ${{outputs.output_path}}" + #"The input is ignored, could be used as ${{inputs.data_path}} through flags" ] + register_kwargs: + version: 2 pipeline: examples.pipeline.ez_azml_pipeline compute: ../clusters/cpu_raw.yaml diff --git a/examples/pipeline.py b/examples/pipeline.py index e459403..82d39e2 100644 --- a/examples/pipeline.py +++ b/examples/pipeline.py @@ -1,17 +1,5 @@ -# def ez_azml_command_fn( -# test_input: "Input(type='uri_file')", # type: ignore -# test_output: "Output(type='uri_folder')", # type: ignore -# ): -# """Example command function.""" -# print("This is an example!") -# out_path = f"{test_output}/out_file.txt" -# with open(str(test_input)) as f_in, open(out_path, "w") as f_out: -# content = f_in.read() -# print(content) -# f_out.write(content) - - def ez_azml_pipeline(test_input): """Example pipeline function.""" - test_result = pytorch(data_path=test_input) # noqa F821 + # pytorch_script is the name of the registered component + test_result = pytorch_script(data_path=test_input) # noqa F821 return {"test_output": test_result.outputs.output_path} diff --git a/examples/pytorch.py b/examples/pytorch_script.py similarity index 100% rename from examples/pytorch.py rename to examples/pytorch_script.py diff --git a/src/ez_azml/cloud_runs/__init__.py b/src/ez_azml/cloud_runs/__init__.py index 2faf956..1fe11e4 100644 --- a/src/ez_azml/cloud_runs/__init__.py +++ b/src/ez_azml/cloud_runs/__init__.py @@ -1,4 +1,4 @@ from .commands import CommandRun, DockerCommandRun -from .pipelines import Pipeline +from .pipelines import PipelineRun -__all__ = ["CommandRun", "DockerCommandRun", "Pipeline"] +__all__ = ["CommandRun", "DockerCommandRun", "PipelineRun"] diff --git a/src/ez_azml/cloud_runs/commands/command.py b/src/ez_azml/cloud_runs/commands/command.py index aeabee1..734fb65 100644 --- a/src/ez_azml/cloud_runs/commands/command.py +++ b/src/ez_azml/cloud_runs/commands/command.py @@ -1,6 +1,6 @@ import io from pathlib import Path -from typing import Optional, Union +from typing import Any, Optional, Union import yaml from azure.ai.ml import Input, MLClient, Output, command, load_component @@ -22,22 +22,26 @@ class CommandRun(CloudRun): def __init__( self, - code: str, - commands: list[str], + code: Union[str, Path], + commands: Union[str, list[str]], ws_connection: Optional[WorkspaceConnection] = None, flags: Optional[list[str]] = None, identity: Optional[UserIdentityConfiguration] = None, name: Optional[str] = None, + register_kwargs: Optional[dict[str, Any]] = None, **kwargs, ) -> None: super().__init__(**kwargs) self.ws_connection = ws_connection self.identity = identity or UserIdentityConfiguration() + if isinstance(commands, str): + commands = [commands] if flags: commands[-1] += " " + " ".join(flags) self.commands = commands self.code = Path(code) self.name = name or self.code.stem + self.register_kwargs = register_kwargs or {} @property def cli_command(self) -> str: @@ -89,14 +93,15 @@ def _get_component_yaml_stream( yaml_stream.seek(0) # Move the file pointer to the beginning return yaml_stream - def _get_component(self, **kwargs): - yaml_file = self._get_component_yaml_stream(**kwargs) + def get_component(self, **kwargs): + """Returns the mldesigner component.""" + yaml_file = self._get_component_yaml_stream(**self.register_kwargs, **kwargs) return load_component(yaml_file) @override def register(self, ml_client: Optional[MLClient] = None, **kwargs): ml_client = ml_client or self.ml_client - component = self._get_component(**kwargs) + component = self.get_component(**kwargs) ml_client.components.create_or_update(component) return component diff --git a/src/ez_azml/cloud_runs/pipelines/__init__.py b/src/ez_azml/cloud_runs/pipelines/__init__.py index d2381c3..be769de 100644 --- a/src/ez_azml/cloud_runs/pipelines/__init__.py +++ b/src/ez_azml/cloud_runs/pipelines/__init__.py @@ -1,3 +1,3 @@ -from .pipeline import Pipeline +from .pipeline import PipelineRun -__all__ = ["Pipeline"] +__all__ = ["PipelineRun"] diff --git a/src/ez_azml/cloud_runs/pipelines/pipeline.py b/src/ez_azml/cloud_runs/pipelines/pipeline.py index aec1e00..2500bbc 100644 --- a/src/ez_azml/cloud_runs/pipelines/pipeline.py +++ b/src/ez_azml/cloud_runs/pipelines/pipeline.py @@ -8,7 +8,7 @@ from ez_azml.cloud_runs.commands import CommandRun -class Pipeline(CloudRun): +class PipelineRun(CloudRun): """Cloud run that uses mldesigner pipelines. Args: @@ -24,6 +24,7 @@ def __init__( commands: dict[str, CommandRun], pipeline: Callable, dec_kwargs: Optional[dict[str, Any]] = None, + register_components: bool = False, **kwargs, ) -> None: super().__init__(**kwargs) @@ -35,9 +36,17 @@ def __init__( self.commands = commands self.dec_kwargs = dec_kwargs or {} self.pipeline = pipeline + self.register_components = register_components - def _register_components(self, commands: dict[str, CommandRun]) -> list[Component]: - return [command.register(self.ml_client) for command in commands.values()] + def _get_command_components( + self, commands: dict[str, CommandRun] + ) -> list[Component]: + return [ + command.register(self.ml_client) + if self.register_components + else command.get_component() + for command in commands.values() + ] def _build_pipeline( self, @@ -49,8 +58,6 @@ def _build_pipeline( for component in components: pipeline.__globals__[component.name] = component dec_pipeline = pipeline_dec(**dec_kwargs)(pipeline) - # for component in components: - # dec_pipeline.__globals__[component.name] = component return dec_pipeline def _setup_dec_kwargs(self, dec_kwargs: dict[str, Any]): @@ -65,20 +72,17 @@ def _setup_dec_kwargs(self, dec_kwargs: dict[str, Any]): @override def register(self): - return self.ml_client.jobs.create_or_update( - self.pipeline_job, experiment_name=self.experiment_name - ) - - @override - def on_run_start(self): - self.components = self._register_components(self.commands) + self.components = self._get_command_components(self.commands) self.dec_kwargs = self._setup_dec_kwargs(self.dec_kwargs) self.pipeline = self._build_pipeline( self.pipeline, self.dec_kwargs, self.components ) + self.pipeline_component = self.pipeline(**self.inputs, **self.outputs) + return self.ml_client.jobs.create_or_update( + self.pipeline_component, experiment_name=self.experiment_name + ) @override def run(self) -> RunOutput: - self.pipeline_job = self.pipeline(**self.inputs, **self.outputs) pipe = self.register() return RunOutput(url=pipe.studio_url) diff --git a/tests/cloud_runs/pipelines/test_pipeline.py b/tests/cloud_runs/pipelines/test_pipeline.py index 026e835..c2663fa 100644 --- a/tests/cloud_runs/pipelines/test_pipeline.py +++ b/tests/cloud_runs/pipelines/test_pipeline.py @@ -2,7 +2,7 @@ from typing import Callable from unittest import mock -from ez_azml.cloud_runs.pipelines import Pipeline +from ez_azml.cloud_runs.pipelines import PipelineRun def assert_function_wrapped(function: Callable, wrapped_function: Callable) -> None: @@ -22,28 +22,14 @@ def assert_function_wrapped(function: Callable, wrapped_function: Callable) -> N ), "Unwrapping the wrapped function did not yield the original function." -def test_pipeline_register_components(pipeline: Pipeline): +def test_pipeline_register(pipeline: PipelineRun): """Tests that the components are properly decorated.""" - raw_functions = [c.function for c in pipeline.commands] - registered_commands = pipeline._register_components(pipeline.commands) - for raw_function, registered in zip(raw_functions, registered_commands): - assert_function_wrapped(raw_function, registered.function) - -def test_pipeline_setup_dec_kwargs(pipeline: Pipeline): - """Tests that the pipeline decorator kwargs are properly postprocessed.""" - pipeline.dec_kwargs = pipeline._setup_dec_kwargs(pipeline.dec_kwargs) - - -def test_build_pipeline(pipeline: Pipeline): - """Tests that the pipeline is properly decorated.""" - - def f(): + def mock_create(*args, **kwargs): pass - with mock.patch.object(pipeline, "commands", return_value=f) as mocked: - mocked.__name__ = "command_fn" - - raw_pipeline = pipeline.pipeline - built_pipeline = pipeline._build_pipeline(raw_pipeline, pipeline.dec_kwargs) - assert_function_wrapped(raw_pipeline, built_pipeline) + with mock.patch.object( + pipeline, "ml_client.jobs.create_or_update", return_value=mock_create + ) as mocked: + pipeline.register() + print(mocked) diff --git a/tests/conftest.py b/tests/conftest.py index 63504e8..c12be4e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,22 +1,25 @@ +from pathlib import Path from typing import Callable -import mldesigner as mld import pytest from azure.ai.ml import Input, Output -from ez_azml.cloud_runs.pipelines import Pipeline, PipelineCommand +from ez_azml.cloud_runs import CommandRun, PipelineRun @pytest.fixture() -def command_fn() -> Callable: - """Function used in mldesigner command component.""" +def command_code_path(tmp_path: Path) -> Path: + """Points to tmp file.""" + path = tmp_path / "test_command.py" + path.write_text(""" + print('this is a test command') + """) + return path - def command__fn( - command_input: mld.Input(type="uri_folder"), # type: ignore - command_output: mld.Output(type="uri_folder"), # type: ignore - ): - print("this is a test function") - return command__fn +@pytest.fixture() +def command(command_code_path: Path): + """CommandRun fixture.""" + return CommandRun(code=command_code_path, commands="python command.py") @pytest.fixture() @@ -27,22 +30,16 @@ def pipeline__fn( pipeline_input: Input, pipeline_output: Output, ): - command_fn(pipeline_input) + test_command(pipeline_input) # noqa F821 return pipeline__fn @pytest.fixture() -def pipeline_command(command_fn: Callable): - """PipelineCommand fixture.""" - return PipelineCommand(function=command_fn) - - -@pytest.fixture() -def pipeline(pipeline_fn: Callable, pipeline_command: PipelineCommand): - """Pipeline cloud run fixture.""" - return Pipeline( +def pipeline(pipeline_fn: Callable, command: CommandRun): + """PipelineRun cloud run fixture.""" + return PipelineRun( experiment_name="fixture", - commands=[pipeline_command], + commands=[command], pipeline=pipeline_fn, ) From 2b2ad07d2a51064e135ec07d948e3b08b256c5cc Mon Sep 17 00:00:00 2001 From: Alejandro Date: Fri, 9 Aug 2024 19:48:28 +0200 Subject: [PATCH 4/7] refactored tests --- configs/environments/conda.yaml | 5 -- configs/pipeline/main.yaml | 18 +++++++ examples/pipeline.py | 1 + examples/print_output.py | 11 ++++ src/ez_azml/cloud_runs/commands/command.py | 6 ++- src/ez_azml/cloud_runs/pipelines/pipeline.py | 10 ++-- tests/cloud_runs/commands/test_commands.py | 25 +++++++++ tests/cloud_runs/pipelines/test_pipeline.py | 39 +++----------- tests/conftest.py | 53 +++++++++++++++++--- 9 files changed, 119 insertions(+), 49 deletions(-) create mode 100644 examples/print_output.py create mode 100644 tests/cloud_runs/commands/test_commands.py diff --git a/configs/environments/conda.yaml b/configs/environments/conda.yaml index 41294fa..55c1e62 100644 --- a/configs/environments/conda.yaml +++ b/configs/environments/conda.yaml @@ -4,8 +4,3 @@ channels: dependencies: - python=3.9.12 - pip=21.2.2 - - pip: - - mldesigner==0.1.0b18 - - azure-ai-ml==1.18.0 - - azureml-mlflow==1.56.0 - - ez-azml==0.1.1 diff --git a/configs/pipeline/main.yaml b/configs/pipeline/main.yaml index e893ab6..1adb27f 100644 --- a/configs/pipeline/main.yaml +++ b/configs/pipeline/main.yaml @@ -34,6 +34,24 @@ cloud_run: ] register_kwargs: version: 2 + command: + class_path: ez_azml.cloud_runs.CommandRun + init_args: + name: print_output + compute: ../clusters/cpu.yaml + code: examples/print_output.py + environment: ../environments/pipeline.yaml + inputs: + prev_output: + class_path: azure.ai.ml.Input + init_args: + type: uri_folder + commands: [ + echo 'This is an print example', + "python print_output.py --path ${{inputs.prev_output}}" + ] + register_kwargs: + version: 2 pipeline: examples.pipeline.ez_azml_pipeline compute: ../clusters/cpu_raw.yaml diff --git a/examples/pipeline.py b/examples/pipeline.py index 82d39e2..14c61fc 100644 --- a/examples/pipeline.py +++ b/examples/pipeline.py @@ -2,4 +2,5 @@ def ez_azml_pipeline(test_input): """Example pipeline function.""" # pytorch_script is the name of the registered component test_result = pytorch_script(data_path=test_input) # noqa F821 + print_output(prev_output=test_result.outputs.output_path) # noqa F821 return {"test_output": test_result.outputs.output_path} diff --git a/examples/print_output.py b/examples/print_output.py new file mode 100644 index 0000000..f911e0b --- /dev/null +++ b/examples/print_output.py @@ -0,0 +1,11 @@ +import argparse +from pathlib import Path + +# Argument parsing +parser = argparse.ArgumentParser(description="Print input") +parser.add_argument("--path", type=Path, default="outputs", help="output to read") +args = parser.parse_args() +path: Path = args.path +for file in path.glob("*.txt"): + content = (path / file).read_text() + print(content) diff --git a/src/ez_azml/cloud_runs/commands/command.py b/src/ez_azml/cloud_runs/commands/command.py index 734fb65..da49322 100644 --- a/src/ez_azml/cloud_runs/commands/command.py +++ b/src/ez_azml/cloud_runs/commands/command.py @@ -14,10 +14,12 @@ class CommandRun(CloudRun): """Cloud run that is based on AzureML Commands. Args: - code: location of the scripts to use + code: location of the python scripts to use commands: commands to run on the cloud (e.g. `python my_script.py`) flags: flags to use with the last command. identity: credentials to use. + name: command's name. + register_kwargs: kwargs to use when registering component. """ def __init__( @@ -77,6 +79,7 @@ def _get_component_yaml_stream( inputs_dict = self._get_io_dict(self.inputs) outputs_dict = self._get_io_dict(self.outputs) if not environment: + self.ml_client.environments.create_or_update(self.environment) version = self.environment.version or 1 environment = f"azureml:{self.environment.name}:{version}" yaml_dict = { @@ -109,7 +112,6 @@ def register(self, ml_client: Optional[MLClient] = None, **kwargs): def run(self) -> str: if self.ws_connection: self.ml_client.connections.create_or_update(self.ws_connection) - self.ml_client.environments.create_or_update(self.environment) self.ml_client.begin_create_or_update(self.compute).result() cloud_job = self.ml_client.create_or_update(self.command) return RunOutput(url=cloud_job.studio_url) diff --git a/src/ez_azml/cloud_runs/pipelines/pipeline.py b/src/ez_azml/cloud_runs/pipelines/pipeline.py index 2500bbc..934ca0e 100644 --- a/src/ez_azml/cloud_runs/pipelines/pipeline.py +++ b/src/ez_azml/cloud_runs/pipelines/pipeline.py @@ -1,7 +1,7 @@ from typing import Any, Callable, Optional from azure.ai.ml.dsl import pipeline as pipeline_dec -from azure.ai.ml.entities import Component +from azure.ai.ml.entities import Component, PipelineJob from typing_extensions import override from ez_azml.cloud_runs.cloud_run import CloudRun, RunOutput @@ -71,16 +71,20 @@ def _setup_dec_kwargs(self, dec_kwargs: dict[str, Any]): return dec_kwargs @override - def register(self): + def register(self) -> PipelineJob: + for command in self.commands.values(): + command.ml_client = command.ml_client or self.ml_client self.components = self._get_command_components(self.commands) self.dec_kwargs = self._setup_dec_kwargs(self.dec_kwargs) self.pipeline = self._build_pipeline( self.pipeline, self.dec_kwargs, self.components ) self.pipeline_component = self.pipeline(**self.inputs, **self.outputs) - return self.ml_client.jobs.create_or_update( + # TODO: add tenacity + self.ml_client.jobs.create_or_update( self.pipeline_component, experiment_name=self.experiment_name ) + return self.pipeline_component @override def run(self) -> RunOutput: diff --git a/tests/cloud_runs/commands/test_commands.py b/tests/cloud_runs/commands/test_commands.py new file mode 100644 index 0000000..93b4239 --- /dev/null +++ b/tests/cloud_runs/commands/test_commands.py @@ -0,0 +1,25 @@ +from pathlib import Path + +import yaml +from azure.ai.ml.entities import CommandComponent +from ez_azml.cloud_runs import CommandRun + + +def test_get_component_yaml(command: CommandRun, command_code_path: Path): + """Tests that the yaml is properly generated.""" + yaml_io = command._get_component_yaml_stream(**command.register_kwargs) + expected_yaml = { + "code": str(command_code_path), + "command": "python command.py", + "environment": "azureml:test_environment:1", + "inputs": {"command_input": {"type": "uri_folder"}}, + "name": "test_command", + "outputs": {}, + } + assert expected_yaml == yaml.safe_load(yaml_io) + + +def test_e2e_register(command: CommandRun): + """Tests e2e mock register.""" + component = command.register() + assert isinstance(component, CommandComponent) diff --git a/tests/cloud_runs/pipelines/test_pipeline.py b/tests/cloud_runs/pipelines/test_pipeline.py index c2663fa..2909ad7 100644 --- a/tests/cloud_runs/pipelines/test_pipeline.py +++ b/tests/cloud_runs/pipelines/test_pipeline.py @@ -1,35 +1,8 @@ -import inspect -from typing import Callable -from unittest import mock +from azure.ai.ml.entities import PipelineJob +from ez_azml.cloud_runs import PipelineRun -from ez_azml.cloud_runs.pipelines import PipelineRun - -def assert_function_wrapped(function: Callable, wrapped_function: Callable) -> None: - """Asserts that a function is properly wrapped. - - Args: - wrapped_function: The function that is potentially wrapped. - function: The original function to compare against after unwrapping. - - Raises: - AssertionError: If the function is already wrapped or if unwrapping the wrapped - function does not yield the original function. - """ - assert not hasattr(function, "__wrapped__"), "The function should not be wrapped." - assert ( - inspect.unwrap(wrapped_function) is function - ), "Unwrapping the wrapped function did not yield the original function." - - -def test_pipeline_register(pipeline: PipelineRun): - """Tests that the components are properly decorated.""" - - def mock_create(*args, **kwargs): - pass - - with mock.patch.object( - pipeline, "ml_client.jobs.create_or_update", return_value=mock_create - ) as mocked: - pipeline.register() - print(mocked) +def test_e2e_register(pipeline: PipelineRun): + """Tests e2e mock register.""" + component = pipeline.register() + assert isinstance(component, PipelineJob) diff --git a/tests/conftest.py b/tests/conftest.py index c12be4e..6ce6266 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,11 +1,44 @@ from pathlib import Path from typing import Callable +from unittest import mock import pytest -from azure.ai.ml import Input, Output +from azure.ai.ml import Input, MLClient, Output +from azure.ai.ml.entities import Environment from ez_azml.cloud_runs import CommandRun, PipelineRun +@pytest.fixture() +def ml_client() -> MLClient: # type: ignore + """MLClient fixture.""" + mock_credential = mock.MagicMock() + ml_client = MLClient(credential=mock_credential) + with ( + mock.patch.object(ml_client.components, "create_or_update", return_value=True), + mock.patch.object(ml_client.jobs, "create_or_update", return_value=True), + mock.patch.object( + ml_client.environments, "create_or_update", return_value=True + ), + ): + yield ml_client + + +@pytest.fixture() +def conda_env_file() -> Path: + """Path to a conda env file.""" + return Path(__file__).parent.parent / "configs/environments/conda.yaml" + + +@pytest.fixture() +def environment(conda_env_file: Path) -> Environment: + """Environment fixture.""" + return Environment( + image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04", + name="test_environment", + conda_file=str(conda_env_file), + ) + + @pytest.fixture() def command_code_path(tmp_path: Path) -> Path: """Points to tmp file.""" @@ -17,28 +50,36 @@ def command_code_path(tmp_path: Path) -> Path: @pytest.fixture() -def command(command_code_path: Path): +def command(ml_client: MLClient, command_code_path: Path, environment: Environment): """CommandRun fixture.""" - return CommandRun(code=command_code_path, commands="python command.py") + inputs = {"command_input": Input()} + return CommandRun( + ml_client=ml_client, + code=command_code_path, + commands="python command.py", + environment=environment, + inputs=inputs, + ) @pytest.fixture() -def pipeline_fn(command_fn) -> Callable: +def pipeline_fn() -> Callable: """Function used in azure.ai.ml pipelines.""" def pipeline__fn( pipeline_input: Input, pipeline_output: Output, ): - test_command(pipeline_input) # noqa F821 + test_command(command_input=pipeline_input) # noqa F821 return pipeline__fn @pytest.fixture() -def pipeline(pipeline_fn: Callable, command: CommandRun): +def pipeline(ml_client: MLClient, pipeline_fn: Callable, command: CommandRun): """PipelineRun cloud run fixture.""" return PipelineRun( + ml_client=ml_client, experiment_name="fixture", commands=[command], pipeline=pipeline_fn, From dcb2017accdea46bb395fb3002949fac4ff5dcca Mon Sep 17 00:00:00 2001 From: Alejandro Date: Fri, 9 Aug 2024 19:49:08 +0200 Subject: [PATCH 5/7] refactored tests --- examples/pipeline.py | 4 ++-- tests/conftest.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/pipeline.py b/examples/pipeline.py index 14c61fc..fd2baeb 100644 --- a/examples/pipeline.py +++ b/examples/pipeline.py @@ -1,6 +1,6 @@ def ez_azml_pipeline(test_input): """Example pipeline function.""" # pytorch_script is the name of the registered component - test_result = pytorch_script(data_path=test_input) # noqa F821 - print_output(prev_output=test_result.outputs.output_path) # noqa F821 + test_result = pytorch_script(data_path=test_input) # type: ignore # noqa F821 + print_output(prev_output=test_result.outputs.output_path) # type: ignore # noqa F821 return {"test_output": test_result.outputs.output_path} diff --git a/tests/conftest.py b/tests/conftest.py index 6ce6266..0b47be6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -70,7 +70,7 @@ def pipeline__fn( pipeline_input: Input, pipeline_output: Output, ): - test_command(command_input=pipeline_input) # noqa F821 + test_command(command_input=pipeline_input) # type: ignore # noqa F821 return pipeline__fn From 6b8a0a8691f1184d5b804f172a5dd4eacead80fc Mon Sep 17 00:00:00 2001 From: Alejandro Date: Fri, 9 Aug 2024 20:19:47 +0200 Subject: [PATCH 6/7] working docker param tests --- tests/ml_client/test_ez_ml_client.py | 0 tests/params/test_docker_params.py | 65 ++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 tests/ml_client/test_ez_ml_client.py create mode 100644 tests/params/test_docker_params.py diff --git a/tests/ml_client/test_ez_ml_client.py b/tests/ml_client/test_ez_ml_client.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/params/test_docker_params.py b/tests/params/test_docker_params.py new file mode 100644 index 0000000..32a1e49 --- /dev/null +++ b/tests/params/test_docker_params.py @@ -0,0 +1,65 @@ +from unittest import mock + +import pytest +from ez_azml.params import DockerParams # Replace with the actual import path + +# Mock environment variables +mock_env_vars = { + "DOCKER_IMAGE": "mock_image", + "DOCKER_REGISTRY": "mock_registry", + "DOCKER_TAG": "mock_tag", + "DOCKER_USERNAME": "mock_username", + "DOCKER_PASSWORD": "mock_password", +} + + +# Mock env.os_field function to return environment variables +def mock_os_getenv(env_var, default=None): + """Mocks getenv.""" + return mock_env_vars.get(env_var, default) + + +@pytest.mark.parametrize( + "kwargs", + [ + ({}), + ( + { + "image_name": "custom_image", + "registry": "custom_registry", + "tag": "custom_tag", + "username": "custom_username", + "password": "custom_password", + } + ), + ({"image_name": "custom_image", "registry": "custom_registry"}), + ({"image_name": "custom_image", "tag": "custom_tag"}), + ({"image_name": "custom_image", "registry": "", "tag": "custom_tag"}), + ], +) +@mock.patch("os.getenv", side_effect=mock_os_getenv) +def test_docker_params(mock_os_getenv, kwargs): + """Tests docker params constructor.""" + # Initialize DockerParams with the given kwargs + params = DockerParams(**kwargs) + + # Derive expected values based on kwargs or mock_env_vars + expected_image_name = kwargs.get("image_name", mock_env_vars["DOCKER_IMAGE"]) + expected_registry = kwargs.get("registry", mock_env_vars["DOCKER_REGISTRY"]) + expected_tag = kwargs.get("tag", mock_env_vars["DOCKER_TAG"]) + expected_username = kwargs.get("username", mock_env_vars["DOCKER_USERNAME"]) + expected_password = kwargs.get("password", mock_env_vars["DOCKER_PASSWORD"]) + + # Build the expected image string + if expected_registry: + expected_image = f"{expected_registry}/{expected_image_name}:{expected_tag}" + else: + expected_image = f"{expected_image_name}:{expected_tag}" + + # Assertions + assert params.image_name == expected_image_name + assert params.registry == expected_registry + assert params.tag == expected_tag + assert params.username == expected_username + assert params.password == expected_password + assert params.image == expected_image From 58d2b8a96b2935df979bd62640c2b5470dda5928 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Sat, 10 Aug 2024 12:06:44 +0200 Subject: [PATCH 7/7] more tests --- src/ez_azml/env.py | 4 ++++ src/ez_azml/ml_client.py | 12 +++++++++--- tests/cli/test_cli.py | 25 ++++++++++++++++++++++++ tests/ml_client/test_ez_ml_client.py | 20 +++++++++++++++++++ tests/params/test_docker_params.py | 15 +------------- tests/utils.py | 29 ++++++++++++++++++++++++++++ 6 files changed, 88 insertions(+), 17 deletions(-) create mode 100644 tests/cli/test_cli.py create mode 100644 tests/utils.py diff --git a/src/ez_azml/env.py b/src/ez_azml/env.py index dc398f9..cc92aa5 100644 --- a/src/ez_azml/env.py +++ b/src/ez_azml/env.py @@ -9,6 +9,10 @@ DOCKER_TAG = "DOCKER_TAG" DOCKER_IMAGE = "DOCKER_IMAGE" +AZURE_SUBSCRIPTION_ID = "AZURE_SUBSCRIPTION_ID" +AZURE_RESOURCE_GROUP_NAME = "AZURE_RESOURCE_GROUP_NAME" +AZURE_WORKSPACE = "AZURE_WORKSPACE" + def os_field(env_var: str, default: Optional[str] = None) -> Optional[str]: """Dataclass field that retrieves an environment variable. diff --git a/src/ez_azml/ml_client.py b/src/ez_azml/ml_client.py index b6aeffb..271520e 100644 --- a/src/ez_azml/ml_client.py +++ b/src/ez_azml/ml_client.py @@ -6,6 +6,12 @@ from azure.core.credentials import TokenCredential from azure.identity import DefaultAzureCredential +from ez_azml.env import ( + AZURE_RESOURCE_GROUP_NAME, + AZURE_SUBSCRIPTION_ID, + AZURE_WORKSPACE, +) + class EzMLClient(MLClient): """A easy client class to interact with Azure ML services. @@ -30,11 +36,11 @@ def __init__( **kwargs: Any, ) -> None: credential = credential or DefaultAzureCredential() - subscription_id = subscription_id or os.getenv("AZURE_SUBSCRIPTION_ID") + subscription_id = subscription_id or os.getenv(AZURE_SUBSCRIPTION_ID) resource_group_name = resource_group_name or os.getenv( - "AZURE_RESOURCE_GROUP_NAME" + AZURE_RESOURCE_GROUP_NAME ) - workspace_name = workspace_name or os.getenv("AZURE_WORKSPACE") + workspace_name = workspace_name or os.getenv(AZURE_WORKSPACE) super().__init__( credential, subscription_id, resource_group_name, workspace_name, **kwargs ) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py new file mode 100644 index 0000000..0252929 --- /dev/null +++ b/tests/cli/test_cli.py @@ -0,0 +1,25 @@ +from unittest import mock + +import pytest +from ez_azml.cli import EzAzureMLCLI + + +@pytest.mark.parametrize("command", ["run", "register"]) +def test_cloud_run_hooks_call(command: str): + """Tests that cli is calling pre and post hooks.""" + mock_cloud_run = mock.MagicMock() + cli = EzAzureMLCLI(cloud_run=mock_cloud_run) + + getattr(cli, command)() + + expected = [f"on_{command}_start", command, f"on_{command}_end"] + for method in expected: + getattr(mock_cloud_run, method).assert_called_once() + + +def test_ml_client_set(): + """Tests that ml_client is injected.""" + mock_cloud_run = mock.MagicMock() + mock_cloud_run.ml_client = None + cli = EzAzureMLCLI(cloud_run=mock_cloud_run) + assert cli.ml_client == mock_cloud_run.ml_client diff --git a/tests/ml_client/test_ez_ml_client.py b/tests/ml_client/test_ez_ml_client.py index e69de29..36af8d5 100644 --- a/tests/ml_client/test_ez_ml_client.py +++ b/tests/ml_client/test_ez_ml_client.py @@ -0,0 +1,20 @@ +from unittest import mock + +from ez_azml import EzMLClient +from ez_azml.env import ( + AZURE_RESOURCE_GROUP_NAME, + AZURE_SUBSCRIPTION_ID, + AZURE_WORKSPACE, +) + +from tests.utils import mock_os_getenv + + +@mock.patch("os.getenv", side_effect=mock_os_getenv) +def test_ml_client_envs(mock_os_getenv): + """Tests that init properly retrieves env variables.""" + client = EzMLClient() + + assert client.subscription_id == mock_os_getenv(AZURE_SUBSCRIPTION_ID) + assert client.resource_group_name == mock_os_getenv(AZURE_RESOURCE_GROUP_NAME) + assert client.workspace_name == mock_os_getenv(AZURE_WORKSPACE) diff --git a/tests/params/test_docker_params.py b/tests/params/test_docker_params.py index 32a1e49..8958b29 100644 --- a/tests/params/test_docker_params.py +++ b/tests/params/test_docker_params.py @@ -3,20 +3,7 @@ import pytest from ez_azml.params import DockerParams # Replace with the actual import path -# Mock environment variables -mock_env_vars = { - "DOCKER_IMAGE": "mock_image", - "DOCKER_REGISTRY": "mock_registry", - "DOCKER_TAG": "mock_tag", - "DOCKER_USERNAME": "mock_username", - "DOCKER_PASSWORD": "mock_password", -} - - -# Mock env.os_field function to return environment variables -def mock_os_getenv(env_var, default=None): - """Mocks getenv.""" - return mock_env_vars.get(env_var, default) +from tests.utils import mock_env_vars, mock_os_getenv @pytest.mark.parametrize( diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..b2a11df --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,29 @@ +# Mock environment variables +from ez_azml import env + +MOCK_DOCKER_IMAGE = "MOCK_DOCKER_IMAGE" +MOCK_DOCKER_REGISTRY = "MOCK_DOCKER_REGISTRY" +MOCK_DOCKER_TAG = "MOCK_DOCKER_TAG" +MOCK_DOCKER_USERNAME = "MOCK_DOCKER_USERNAME" +MOCK_DOCKER_PASSWORD = "MOCK_DOCKER_PASSWORD" # noqa: S105 +MOCK_AZURE_SUBSCRIPTION_ID = "MOCK_AZURE_SUBSCRIPTION_ID" +MOCK_AZURE_RESOURCE_GROUP_NAME = "MOCK_AZURE_RESOURCE_GROUP_NAME" +MOCK_AZURE_WORKSPACE = "MOCK_AZURE_WORKSPACE" + +# Rebuild the dictionary using the new variables +mock_env_vars = { + env.DOCKER_IMAGE: MOCK_DOCKER_IMAGE, + env.DOCKER_REGISTRY: MOCK_DOCKER_REGISTRY, + env.DOCKER_TAG: MOCK_DOCKER_TAG, + env.DOCKER_USERNAME: MOCK_DOCKER_USERNAME, + env.DOCKER_PASSWORD: MOCK_DOCKER_PASSWORD, + env.AZURE_SUBSCRIPTION_ID: MOCK_AZURE_SUBSCRIPTION_ID, + env.AZURE_RESOURCE_GROUP_NAME: MOCK_AZURE_RESOURCE_GROUP_NAME, + env.AZURE_WORKSPACE: MOCK_AZURE_RESOURCE_GROUP_NAME, +} + + +# Mock env.os_field function to return environment variables +def mock_os_getenv(env_var, default=None): + """Mocks getenv.""" + return mock_env_vars.get(env_var, default)