Skip to content

Commit

Permalink
Merge pull request #5 from AlejandroBaron/develop
Browse files Browse the repository at this point in the history
v0.1.2
  • Loading branch information
AlejandroBaron authored Aug 10, 2024
2 parents f69571e + 681a924 commit c1ad778
Show file tree
Hide file tree
Showing 28 changed files with 469 additions and 204 deletions.
3 changes: 1 addition & 2 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,5 @@ AZURE_WORKSPACE=
# Docker parameters
DOCKER_REGISTRY=
DOCKER_TAG=
## Gitlab tokens
DOCKER_USERNAME=
DOCKER_PASSWORD=
DOCKER_PASSWORD=
2 changes: 1 addition & 1 deletion .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ jobs:
- uses: pdm-project/setup-pdm@v4.1

- name: Publish package distributions to PyPI
run:
run:
pdm publish
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,21 @@ It aims to:

Simply run
```bash
pip install ezazml
pip install ez-azml
```
# Quickstart
The main entrypoint is the CLI command. The following command will show you the help
```
ez-azml -h
```

You can run an example with
You can run an example with (if you have cloned the repo)
## Command
```
ez-azml --config configs/pytorch/main.yaml run
ez-azml --config configs/command/main.yaml run
```
## Pipeline
## PipelineRun
```
ez-azml --config configs/pipeline/main.yaml run
```
PYTHONPATH=$PYTHONPATH:. ez-azml --config configs/pipeline/main.yaml run
```
(`PYTHONPATH` to point to the examples dir. This is not needed if your pipeline is available in any package as an importable function)
6 changes: 3 additions & 3 deletions configs/pytorch/main.yaml → configs/command/main.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cloud_run:
class_path: ez_azml.cloud_runs.DockerCommand
class_path: ez_azml.cloud_runs.DockerCommandRun
init_args:
docker: docker.yaml
docker: ../dockers/pytorch.yaml
code: examples
compute: ../clusters/cpu.yaml
inputs:
Expand All @@ -20,7 +20,7 @@ cloud_run:
mode: rw_mount
commands: [
echo 'This is an example',
python pytorch.py
python pytorch_script.py
]
flags: [
"--learning_rate 1e-6",
Expand Down
File renamed without changes.
7 changes: 1 addition & 6 deletions configs/environments/conda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,5 @@ name: default_environment
channels:
- defaults
dependencies:
- python=3.8.12
- python=3.9.12
- pip=21.2.2
- pip:
- mldesigner==0.1.0b17
- azure-ai-ml==1.18.0
- azureml-mlflow==1.56.0
- ez-azml==0.1.0
55 changes: 51 additions & 4 deletions configs/pipeline/main.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,59 @@
cloud_run:
class_path: ez_azml.cloud_runs.Pipeline
class_path: ez_azml.cloud_runs.PipelineRun
init_args:
experiment_name: "pipelines example"
commands:
- function: examples.pipelines.test_fn
environment: ../environments/pipeline.yaml
torch:
class_path: ez_azml.cloud_runs.DockerCommandRun
init_args:
docker: ../dockers/pytorch.yaml
code: examples/pytorch_script.py
compute: ../clusters/cpu.yaml
inputs:
data_path:
class_path: azure.ai.ml.Input
init_args:
type: uri_folder
path: azureml://datastores/workspaceblobstore/paths/data
mode: ro_mount
outputs:
output_path:
class_path: azure.ai.ml.Output
init_args:
type: uri_folder
path: azureml://datastores/workspaceblobstore/paths/outputs
mode: rw_mount
commands: [
echo 'This is an example',
python pytorch_script.py
]
flags: [
"--learning_rate 1e-6",
"--output_path ${{outputs.output_path}}"
#"The input is ignored, could be used as ${{inputs.data_path}} through flags"
]
register_kwargs:
version: 2
command:
class_path: ez_azml.cloud_runs.CommandRun
init_args:
name: print_output
compute: ../clusters/cpu.yaml
code: examples/print_output.py
environment: ../environments/pipeline.yaml
inputs:
prev_output:
class_path: azure.ai.ml.Input
init_args:
type: uri_folder
commands: [
echo 'This is an print example',
"python print_output.py --path ${{inputs.prev_output}}"
]
register_kwargs:
version: 2

pipeline: examples.pipelines.test_pipeline
pipeline: examples.pipeline.ez_azml_pipeline
compute: ../clusters/cpu_raw.yaml
inputs:
test_input:
Expand Down
6 changes: 6 additions & 0 deletions examples/pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def ez_azml_pipeline(test_input):
"""Example pipeline function."""
# pytorch_script is the name of the registered component
test_result = pytorch_script(data_path=test_input) # type: ignore # noqa F821
print_output(prev_output=test_result.outputs.output_path) # type: ignore # noqa F821
return {"test_output": test_result.outputs.output_path}
16 changes: 0 additions & 16 deletions examples/pipelines.py

This file was deleted.

11 changes: 11 additions & 0 deletions examples/print_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import argparse
from pathlib import Path

# Argument parsing
parser = argparse.ArgumentParser(description="Print input")
parser.add_argument("--path", type=Path, default="outputs", help="output to read")
args = parser.parse_args()
path: Path = args.path
for file in path.glob("*.txt"):
content = (path / file).read_text()
print(content)
File renamed without changes.
7 changes: 7 additions & 0 deletions src/ez_azml/cli/ez_azml.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,10 @@ def run(self):
output = self.cloud_run.run()
logger.info(f"Run available at {output.url}")
self.cloud_run.on_run_end(output)

def register(self):
"""Registers the cloud run as a reusable component."""
self.cloud_run.on_register_start()
self.cloud_run.register()
self.cloud_run.on_register_end()
logger.info("Cloud run registered!")
6 changes: 3 additions & 3 deletions src/ez_azml/cloud_runs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .commands import Command, DockerCommand
from .pipelines import Pipeline
from .commands import CommandRun, DockerCommandRun
from .pipelines import PipelineRun

__all__ = ["Command", "DockerCommand", "Pipeline"]
__all__ = ["CommandRun", "DockerCommandRun", "PipelineRun"]
12 changes: 12 additions & 0 deletions src/ez_azml/cloud_runs/cloud_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,15 @@ def run(self) -> RunOutput:
def on_run_end(self, output: RunOutput):
"""Hook called once run has been submitted."""
return

def on_register_start(self):
"""Hook called before a component has been registered."""
return

@abstractmethod
def register(self):
"""Registers the run as a reusable component."""

def on_register_end(self):
"""Hook called after a component has been registered."""
return
6 changes: 3 additions & 3 deletions src/ez_azml/cloud_runs/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .command import Command
from .docker_command import DockerCommand
from .command import CommandRun
from .docker_command import DockerCommandRun

__all__ = ["DockerCommand", "Command"]
__all__ = ["DockerCommandRun", "CommandRun"]
97 changes: 80 additions & 17 deletions src/ez_azml/cloud_runs/commands/command.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,117 @@
from typing import Optional
import io
from pathlib import Path
from typing import Any, Optional, Union

from azure.ai.ml import command
from azure.ai.ml.entities import (
UserIdentityConfiguration,
WorkspaceConnection,
)
import yaml
from azure.ai.ml import Input, MLClient, Output, command, load_component
from azure.ai.ml.entities import Command, UserIdentityConfiguration, WorkspaceConnection
from typing_extensions import override

from ez_azml.cloud_runs.cloud_run import CloudRun, RunOutput


class Command(CloudRun):
class CommandRun(CloudRun):
"""Cloud run that is based on AzureML Commands.
Args:
code: location of the scripts to use
code: location of the python scripts to use
commands: commands to run on the cloud (e.g. `python my_script.py`)
flags: flags to use with the last command.
identity: credentials to use.
name: command's name.
register_kwargs: kwargs to use when registering component.
"""

def __init__(
self,
code: str,
commands: list[str],
code: Union[str, Path],
commands: Union[str, list[str]],
ws_connection: Optional[WorkspaceConnection] = None,
flags: Optional[list[str]] = None,
identity: Optional[UserIdentityConfiguration] = None,
name: Optional[str] = None,
register_kwargs: Optional[dict[str, Any]] = None,
**kwargs,
) -> None:
super().__init__(**kwargs)
self.ws_connection = ws_connection
identity = identity or UserIdentityConfiguration()
self.identity = identity or UserIdentityConfiguration()
if isinstance(commands, str):
commands = [commands]
if flags:
commands[-1] += " " + " ".join(flags)
self.job = command(
command=";".join(commands),
code=code,
self.commands = commands
self.code = Path(code)
self.name = name or self.code.stem
self.register_kwargs = register_kwargs or {}

@property
def cli_command(self) -> str:
"""Actual cli command run on AzureML."""
return ";".join(self.commands)

@property
def command(self) -> Command:
"""Runnable command."""
return command(
command=self.cli_command,
code=self.code,
environment=self.environment,
compute=self.compute.name,
inputs=self.inputs,
outputs=self.outputs,
identity=identity,
identity=self.identity,
)

def _get_io_dict(
self, ios: dict[str, Union[Input, Output]], keys: Optional[list[str]] = None
):
ios_as_dict = {}
keys = keys or ["type"]
for key, io_obj in ios.items():
io_d = dict(io_obj)
ios_as_dict[key] = {k: io_d[k] for k in keys if io_d[k] is not None}
return ios_as_dict

def _get_component_yaml_stream(
self, name: Optional[str] = None, environment: Optional[str] = None, **kwargs
) -> io.StringIO:
inputs_dict = self._get_io_dict(self.inputs)
outputs_dict = self._get_io_dict(self.outputs)
if not environment:
self.ml_client.environments.create_or_update(self.environment)
version = self.environment.version or 1
environment = f"azureml:{self.environment.name}:{version}"
yaml_dict = {
"name": name or self.name,
"inputs": inputs_dict,
"outputs": outputs_dict,
"code": str(self.code),
"command": self.cli_command,
"environment": environment,
**kwargs,
}
yaml_stream = io.StringIO()
yaml.dump(yaml_dict, yaml_stream)
yaml_stream.seek(0) # Move the file pointer to the beginning
return yaml_stream

def get_component(self, **kwargs):
"""Returns the mldesigner component."""
yaml_file = self._get_component_yaml_stream(**self.register_kwargs, **kwargs)
return load_component(yaml_file)

@override
def register(self, ml_client: Optional[MLClient] = None, **kwargs):
ml_client = ml_client or self.ml_client
component = self.get_component(**kwargs)
ml_client.components.create_or_update(component)
return component

@override
def run(self) -> str:
if self.ws_connection:
self.ml_client.connections.create_or_update(self.ws_connection)
self.ml_client.environments.create_or_update(self.environment)
self.ml_client.begin_create_or_update(self.compute).result()
cloud_job = self.ml_client.create_or_update(self.job)
cloud_job = self.ml_client.create_or_update(self.command)
return RunOutput(url=cloud_job.studio_url)
4 changes: 2 additions & 2 deletions src/ez_azml/cloud_runs/commands/docker_command.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from ez_azml.entities import DockerEnvironment, DockerWorkspaceConnection
from ez_azml.params import DockerParams

from .command import Command
from .command import CommandRun


class DockerCommand(Command):
class DockerCommandRun(CommandRun):
"""A Command that uses a docker image.
Args:
Expand Down
4 changes: 2 additions & 2 deletions src/ez_azml/cloud_runs/pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .pipeline import Pipeline, PipelineCommand
from .pipeline import PipelineRun

__all__ = ["Pipeline", "PipelineCommand"]
__all__ = ["PipelineRun"]
Loading

0 comments on commit c1ad778

Please sign in to comment.