diff --git a/infra/ansible/.ansible-lint b/infra/ansible/.ansible-lint new file mode 100644 index 00000000000..a8661e61201 --- /dev/null +++ b/infra/ansible/.ansible-lint @@ -0,0 +1,6 @@ +--- +# .ansible-lint + +profile: moderate +skip_list: + - schema[tasks] \ No newline at end of file diff --git a/infra/ansible/Dockerfile b/infra/ansible/Dockerfile new file mode 100644 index 00000000000..d828e0153d9 --- /dev/null +++ b/infra/ansible/Dockerfile @@ -0,0 +1,32 @@ +ARG python_version=3.8 +ARG debian_version=buster + +FROM python:${python_version}-${debian_version} AS build + +WORKDIR /ansible +RUN pip install ansible +COPY . /ansible + +ARG ansible_vars +RUN ansible-playbook -vvv playbook.yaml -e "stage=build" -e "${ansible_vars}" + +FROM python:${python_version}-${debian_version} AS release + +WORKDIR /ansible +RUN pip install ansible +COPY . /ansible + +ARG ansible_vars +RUN ansible-playbook -vvv playbook.yaml -e "stage=release" -e "${ansible_vars}" --tags "install_deps" + +WORKDIR /tmp/wheels +COPY --from=build /src/pytorch/dist/*.whl ./ +COPY --from=build /src/pytorch/xla/dist/*.whl ./ + +RUN echo "Installing the following wheels" && ls *.whl +RUN pip install *.whl + +WORKDIR / + +RUN rm -rf /ansible /tmp/wheels +COPY --from=build /dist/*.whl /dist/ diff --git a/infra/ansible/README.md b/infra/ansible/README.md new file mode 100644 index 00000000000..9094f645de3 --- /dev/null +++ b/infra/ansible/README.md @@ -0,0 +1,58 @@ +# Ansible playbook + +This ansible playbook will perform the following actions on the localhost: + * install required pip and apt packages, depending on the specified stage, + architecture and accelerator (see [apt.yaml](config/apt.yaml) and + [pip.yaml](config/pip.yaml)). + * fetch bazel (version configured in [vars.yaml](config/vars.yaml)), + * fetch PyTorch and XLA sources at master (or specific revisions, + see role `fetch_srcs` in [playbook.yaml](playbook.yaml)). + * set required environment variables (see [env.yaml](config/env.yaml)), + * build and install PyTorch and XLA wheels, + * apply infrastructure tests (see `*/tests.yaml` files in [roles](roles)). + +## Prerequisites + +* Python 3.8+ +* Ansible. Install with `pip install ansible`. + +## Running + +The playbook requires passing explicitly 3 variables that configure playbook +behavior (installed pip/apt packages and set environment variables): +* `stage`: build or release. Different packages are installed depending on + the chosen stage. +* `arch`: aarch64 or amd64. Architecture of the built image and wheels. +* `accelerator`: tpu or cuda. Available accelerator. + +The variables can be passed through `-e` flag: `-e "="`. + +Example: `ansible-playbook playbook.yaml -e "stage=build arch=amd64 accelerator=tpu"` + +## Config structure + +The playbook configuration is split into 4 files, per each logical system. +The configuration is simply loaded as playbook variables which are then passed +to specific roles and tasks. +Only variables in [config/env.yaml](config/env.yaml) are passed as env variables. + +* [apt.yaml](config/apt.yaml) - specifies apt packages for each stage and + architecture or accelerator. + Packages shared between all architectures and accelerators in a given stage + are specified in `*_common`. They are appended to any architecture specific list. + + This config also contains a list of required apt repos and signing keys. + These variables are mainly consumed by the [install_deps](roles/install_deps/tasks/main.yaml) role. + +* [pip.yaml](config/pip.yaml) - similarly to apt.yaml, lists pip packages per stage and arch / accelerator. + In both pip and apt config files stage and and arch / accelerator are + concatenated together and specified under one key (e.g. build_amd64, release_tpu). + +* [env.yaml](config/env.yaml) - contains Ansible variables that are passed as env variables when + building PyTorch and XLA (`build_env`). Variables in `release_env` are saved in `/etc/environment` (executed for the `release` stage). + +* [vars.yaml](config/vars.yaml) - Ansible variables used in other config files and throughout the playbook. + Not associated with any particular system. + +Variables from these config files are dynamically loaded (during playbook execution), +see [playbook.yaml](playbook.yaml). diff --git a/infra/ansible/ansible.cfg b/infra/ansible/ansible.cfg new file mode 100644 index 00000000000..490c16aea50 --- /dev/null +++ b/infra/ansible/ansible.cfg @@ -0,0 +1,16 @@ +# See https://docs.ansible.com/ansible/latest/reference_appendices/config.html +# for various configuration options. + +[defaults] +# Displays tasks execution duration. +callbacks_enabled = profile_tasks +# The playbooks is only run on the implicit localhost. +# Silence warning about empty hosts inventory. +localhost_warning = False +# Make output human-readable. +stdout_callback = yaml + +[inventory] +# Silence warning about no inventory. +# This option is available since Ansible 2.14 (available only with Python 3.9+). +inventory_unparsed_warning = False \ No newline at end of file diff --git a/infra/ansible/config/apt.yaml b/infra/ansible/config/apt.yaml new file mode 100644 index 00000000000..e02eb247862 --- /dev/null +++ b/infra/ansible/config/apt.yaml @@ -0,0 +1,60 @@ +# Contains lists of apt packages for each stage (build|release) and arch or accelerator. +apt: + pkgs: + build_common: + - ccache + - curl + - git + - gnupg + - libopenblas-dev + - ninja-build + - procps + - python3-pip + - rename + - vim + - wget + - clang-format-7 + - lcov + - less + + build_cuda: + - "cuda-libraries-{{ cuda_version | replace('.', '-') }}" + - "cuda-toolkit-{{ cuda_version | replace('.', '-') }}" + - "cuda-minimal-build-{{ cuda_version | replace('.', '-') }}" + - "{{ cuda_deps['libcudnn'][cuda_version] }}" + - "{{ cuda_deps['libcudnn-dev'][cuda_version] }}" + + build_amd64: + - "clang-{{ clang_version }}" + + build_aarch64: + - scons + - gcc-10 + - g++-10 + + release_common: + - curl + - git + - gnupg + - libgomp1 + - libopenblas-base + - patch + + release_cuda: + - "cuda-libraries-{{ cuda_version | replace('.', '-') }}" + - "cuda-minimal-build-{{ cuda_version | replace('.', '-') }}" + - "{{ cuda_deps['libcudnn'][cuda_version] }}" + + # Specify objects with string fields `url` and `keyring`. + # The keyring path should start with /usr/share/keyrings/ for debian and ubuntu. + signing_keys: + - url: https://apt.llvm.org/llvm-snapshot.gpg.key + keyring: /usr/share/keyrings/llvm.pgp + - url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo }}/x86_64/3bf863cc.pub" + keyring: /usr/share/keyrings/cuda.pgp + + repos: + # signed-by path should match the corresponding keyring path above. + - "deb [signed-by=/usr/share/keyrings/llvm.pgp] http://apt.llvm.org/{{ llvm_debian_repo }}/ llvm-toolchain-{{ llvm_debian_repo }}-{{ clang_version }} main" + - "deb-src [signed-by=/usr/share/keyrings/llvm.pgp] http://apt.llvm.org/{{ llvm_debian_repo }}/ llvm-toolchain-{{ llvm_debian_repo }}-{{ clang_version }} main" + - "deb [signed-by=/usr/share/keyrings/cuda.pgp] https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo }}/x86_64/ /" \ No newline at end of file diff --git a/infra/ansible/config/cuda_deps.yaml b/infra/ansible/config/cuda_deps.yaml new file mode 100644 index 00000000000..f0b73f1b84d --- /dev/null +++ b/infra/ansible/config/cuda_deps.yaml @@ -0,0 +1,12 @@ +# Versions of cuda dependencies for given cuda versions. +# Note: wrap version in quotes to ensure they're treated as strings. +cuda_deps: + # List all libcudnn8 versions with `apt list -a libcudnn8` + libcudnn: + "11.8": libcudnn8=8.8.0.121-1+cuda11.8 + "11.7": libcudnn8=8.5.0.96-1+cuda11.7 + "11.2": libcudnn8=8.1.1.33-1+cuda11.2 + libcudnn-dev: + "11.8": libcudnn8-dev=8.8.0.121-1+cuda11.8 + "11.7": libcudnn8-dev=8.5.0.96-1+cuda11.7 + "11.2": libcudnn8-dev=8.1.1.33-1+cuda11.2 diff --git a/infra/ansible/config/env.yaml b/infra/ansible/config/env.yaml new file mode 100644 index 00000000000..3a25c298ae3 --- /dev/null +++ b/infra/ansible/config/env.yaml @@ -0,0 +1,49 @@ +# Variables that will be stored in ~/.bashrc and ~/.zshrc files for the release stage. +# They'll be accessible for all processes on the host, also in the development image. +release_env: + common: + # Force GCC because clang/bazel has issues. + CC: gcc + CXX: g++ + # CC: "clang-{{ clang_version }}" + # CXX: "clang++-{{ clang_version }}" + LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" + + tpu: + ACCELERATOR: tpu + TPUVM_MODE: 1 + + cuda: + TF_CUDA_COMPUTE_CAPABILITIES: 7.0,7.5,8.0 + XLA_CUDA: 1 + +# Variables that will be passed to shell environment only for building PyTorch and XLA libs. +build_env: + common: + LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" + # Set explicitly to 0 as setup.py defaults this flag to true if unset. + BUILD_CPP_TESTS: 0 + # Force GCC because clang/bazel has issues. + CC: gcc + CXX: g++ + PYTORCH_BUILD_NUMBER: 1 + TORCH_XLA_VERSION: "{{ package_version }}" + PYTORCH_BUILD_VERSION: "{{ package_version }}" + XLA_SANDBOX_BUILD: 1 + BAZEL_REMOTE_CACHE: 1 + SILO_NAME: "cache-silo-{{ arch }}-{{ accelerator }}" + + amd64: + ARCH: amd64 + + aarch64: + + cuda: + TF_CUDA_COMPUTE_CAPABILITIES: 7.0,7.5,8.0 + XLA_CUDA: 1 + + tpu: + ACCELERATOR: tpu + TPUVM_MODE: 1 + BUNDLE_LIBTPU: 1 + diff --git a/infra/ansible/config/pip.yaml b/infra/ansible/config/pip.yaml new file mode 100644 index 00000000000..7082c33f6e8 --- /dev/null +++ b/infra/ansible/config/pip.yaml @@ -0,0 +1,53 @@ +# Contains lists of pip packages for each stage (build|release) and arch or accelerator. +pip: + pkgs: + # Shared between all architectures and accelerators for the build stage. + build_common: + - astunparse + - cffi + - cloud-tpu-client + - cmake + - coverage + - dataclasses + - expecttest==0.1.3 + - future + - git-archive-all + - google-api-python-client + - google-cloud-storage + - hypothesis + - lark-parser + - ninja + - numpy + - oauth2client + - pyyaml + - requests + - setuptools + - six + - tensorboard + - tensorboardX + - tqdm + - typing + - typing_extensions + - sympy + - yapf==0.30.0 + + build_amd64: + - mkl + - mkl-include + + build_aarch64: + + # Shared between all architectures and accelerators for the release stage. + release_common: + - numpy + - pyyaml + - mkl + - mkl-include + + release_tpu: + + # Packages that will be installed with the `--nodeps` flag. + pkgs_nodeps: + release_common: + - torchvision + - pillow diff --git a/infra/ansible/config/vars.yaml b/infra/ansible/config/vars.yaml new file mode 100644 index 00000000000..981f6d8a1b9 --- /dev/null +++ b/infra/ansible/config/vars.yaml @@ -0,0 +1,10 @@ +# Used for fetching cuda from the right repo, see apt.yaml. +cuda_repo: ubuntu1804 +cuda_version: "11.8" +# Used for fetching clang from the right repo, see apt.yaml. +llvm_debian_repo: buster +clang_version: 10 +# PyTorch and PyTorch/XLA wheel versions. +package_version: 2.0 +# If set to true, wheels will be renamed to $WHEEL_NAME-nightly-cp38-cp38-linux_x86_64.whl. +nightly_release: false \ No newline at end of file diff --git a/infra/ansible/development.Dockerfile b/infra/ansible/development.Dockerfile new file mode 100644 index 00000000000..2132c23821d --- /dev/null +++ b/infra/ansible/development.Dockerfile @@ -0,0 +1,19 @@ +# Dockerfile for building a development image. +# The built image contains all required pip and apt packages for building and +# running PyTorch and PyTorch/XLA. The image doesn't contain any source code. +ARG python_version=3.8 +ARG debian_version=buster + +FROM python:${python_version}-${debian_version} + +RUN pip install ansible + +COPY . /ansible +WORKDIR /ansible + +# List Asnible tasks to apply for the dev image. +ENV TAGS="bazel,configure_env,install_deps" + +ARG ansible_vars +RUN ansible-playbook playbook.yaml -e "stage=build" -e "${ansible_vars}" --tags "${TAGS}" +RUN ansible-playbook playbook.yaml -e "stage=release" -e "${ansible_vars}" --tags "${TAGS}" diff --git a/infra/ansible/e2e_tests.Dockerfile b/infra/ansible/e2e_tests.Dockerfile new file mode 100644 index 00000000000..4a48d37421f --- /dev/null +++ b/infra/ansible/e2e_tests.Dockerfile @@ -0,0 +1,38 @@ +ARG python_version=3.8 +ARG debian_version=buster + +FROM python:${python_version}-${debian_version} AS build + +WORKDIR /ansible +RUN pip install ansible +COPY . /ansible + +# Build PyTorch and PyTorch/XLA wheels. +ARG ansible_vars +RUN ansible-playbook -vvv playbook.yaml -e "stage=build" -e "${ansible_vars}" + +FROM python:${python_version}-${debian_version} +WORKDIR /ansible +RUN pip install ansible +COPY . /ansible + +# Install runtime pip and apt dependencies. +ARG ansible_vars +RUN ansible-playbook -vvv playbook.yaml -e "stage=release" -e "${ansible_vars}" --tags "install_deps" + +# Copy test sources. +RUN mkdir -p /src/pytorch/xla +COPY --from=build /src/pytorch/xla/test /src/pytorch/xla/test + +# Copy and install wheels. +WORKDIR /tmp/wheels +COPY --from=build /src/pytorch/dist/*.whl ./ +COPY --from=build /src/pytorch/xla/dist/*.whl ./ + +RUN echo "Installing the following wheels" && ls *.whl +RUN pip install *.whl + +WORKDIR / + +# Clean-up unused directories. +RUN rm -rf /ansible /tmp/wheels \ No newline at end of file diff --git a/infra/ansible/playbook.yaml b/infra/ansible/playbook.yaml new file mode 100644 index 00000000000..3484fdc72ce --- /dev/null +++ b/infra/ansible/playbook.yaml @@ -0,0 +1,94 @@ +- name: "Install build dependencies" + hosts: localhost + connection: local + + # The playbook requires passing 3 variables explicitly: + # - stage: build or release. Different packages are installed depending on + # the chosen stage. + # - arch: aarch64 or amd64. Architecture of the built image and wheels. + # - accelerator: tpu or cuda. Available accelerator. + pre_tasks: + - name: "Validate required variables" + ansible.builtin.assert: + that: "{{ lookup('ansible.builtin.vars', item.name) is regex(item.pattern) }}" + fail_msg: | + "Variable '{{ item.name }} = '{{ lookup('ansible.builtin.vars', item.name) }}' doesn't match pattern '{{ item.pattern }}'" + "Pass the required variable with: --e \"{{ item.name }}=\"" + loop: + - name: stage + pattern: ^(build|release)$ + - name: arch + pattern: ^(aarch64|amd64)$ + - name: accelerator + pattern: ^(tpu|cuda)$ + + - name: "Include vars from config files" + ansible.builtin.include_vars: + file: "config/{{ item }}" + loop: + # vars.yaml should be the first as other config files depend on it. + - vars.yaml + # cuda_deps should be loaded before apt, since apt depends on it. + - cuda_deps.yaml + - apt.yaml + - pip.yaml + - env.yaml + tags: always # Execute this task even with `--skip-tags` or `--tags` is used. + + roles: + - role: bazel + tags: bazel + + - role: install_deps + vars: + apt_keys: "{{ apt.signing_keys }}" + + # If a variable (like `apt.pkgs.common`) is defined, but not set to + # anything it cannot be concatenated with a list. + # Use `v | default([], true)` to set `v` to an empty array if it evaluates to false. + # See https://jinja.palletsprojects.com/en/3.0.x/templates/#jinja-filters.default. + apt_pkgs: "{{ + apt.pkgs[stage + '_common'] | default([], true) + + apt.pkgs[stage + '_' + arch] | default([], true) + + apt.pkgs[stage + '_' + accelerator] | default([], true) + }}" + + apt_repos: "{{ apt.repos }}" + + pip_pkgs: "{{ + pip.pkgs[stage + '_common'] | default([], true) + + pip.pkgs[stage + '_' + arch] | default([], true) + + pip.pkgs[stage + '_' + accelerator] | default([], true) + }}" + + pip_pkgs_nodeps: "{{ + pip.pkgs_nodeps[stage + '_common'] | default([], true) + + pip.pkgs_nodeps[stage + '_' + arch] | default([], true) + + pip.pkgs_nodeps[stage + '_' + accelerator] | default([], true) + }}" + tags: install_deps + + - role: fetch_srcs + vars: + src_root: "/src" + tags: fetch_srcs + + - role: build_srcs + vars: + src_root: "/src" + env_vars: "{{ + build_env.common | default({}, true) | + combine(build_env[arch] | default({}, true)) | + combine(build_env[accelerator] | default({}, true)) + }}" + tags: build_srcs + + - role: configure_env + vars: + env_vars: "{{ + release_env.common | default({}, true) | + combine(release_env[arch] | default({}, true)) | + combine(release_env[accelerator] | default({}, true)) + }}" + when: stage == "release" + tags: configure_env diff --git a/infra/ansible/roles/bazel/defaults/main.yaml b/infra/ansible/roles/bazel/defaults/main.yaml new file mode 100644 index 00000000000..6ddadb8b686 --- /dev/null +++ b/infra/ansible/roles/bazel/defaults/main.yaml @@ -0,0 +1 @@ +bazelisk_version: 1.15.0 diff --git a/infra/ansible/roles/bazel/tasks/main.yaml b/infra/ansible/roles/bazel/tasks/main.yaml new file mode 100644 index 00000000000..038a5a1cefa --- /dev/null +++ b/infra/ansible/roles/bazel/tasks/main.yaml @@ -0,0 +1,10 @@ +- name: "Download bazelisk v{{ bazelisk_version }}" + ansible.builtin.get_url: + url: "https://github.com/bazelbuild/bazelisk/releases/download/v{{ bazelisk_version }}/bazelisk-linux-amd64" + dest: /usr/local/bin/bazel + mode: 'u=rxw,g=rw,o=r' + +- name: "Tests" + include_tasks: tests.yaml + tags: + - tests diff --git a/infra/ansible/roles/bazel/tasks/tests.yaml b/infra/ansible/roles/bazel/tasks/tests.yaml new file mode 100644 index 00000000000..4cb19f77210 --- /dev/null +++ b/infra/ansible/roles/bazel/tasks/tests.yaml @@ -0,0 +1,3 @@ +- name: "Bazel --version runs succesfully" + ansible.builtin.command: + cmd: bazel --version diff --git a/infra/ansible/roles/build_srcs/tasks/main.yaml b/infra/ansible/roles/build_srcs/tasks/main.yaml new file mode 100644 index 00000000000..09e221705ba --- /dev/null +++ b/infra/ansible/roles/build_srcs/tasks/main.yaml @@ -0,0 +1,95 @@ +- name: Build PyTorch + ansible.builtin.command: + cmd: python setup.py bdist_wheel + chdir: "{{ (src_root, 'pytorch') | path_join }}" + creates: "{{ (src_root, 'pytorch/dist/*.whl') | path_join }}" + # Set `USE_CUDA=0` as PyTorch cannot be used with GPU in eager and XLA mode. + environment: "{{ env_vars | combine({'USE_CUDA': 0}) }}" + +- name: Find PyTorch *.whl files in pytorch/dist + ansible.builtin.find: + path: "{{ (src_root, 'pytorch/dist') | path_join }}" + pattern: "*.whl" + register: pytorch_wheels + +- name: Install PyTorch wheels + ansible.builtin.pip: + name: "{{ pytorch_wheels.files | map(attribute='path') }}" + state: "forcereinstall" + +- name: Check if build_torch_xla_libs.sh script exists + stat: + path: "{{ (src_root, 'pytorch/xla/build_torch_xla_libs.sh') | path_join }}" + register: build_torch_xla_libs_result + +- name: Build XLA computation client library + ansible.builtin.command: + cmd: bash build_torch_xla_libs.sh -O -D_GLIBCXX_USE_CXX11_ABI=1 + chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" + environment: "{{ env_vars }}" + when: build_torch_xla_libs_result.stat.exists + +- name: Build PyTorch/XLA + ansible.builtin.command: + cmd: python setup.py bdist_wheel + chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" + environment: "{{ env_vars }}" + +- name: Find XLA *.whl files in pytorch/xla/dist + ansible.builtin.find: + path: "{{ (src_root, 'pytorch/xla/dist') | path_join }}" + pattern: "*.whl" + register: xla_wheels + +- name: Install XLA wheels + ansible.builtin.pip: + name: "{{ xla_wheels.files | map(attribute='path') }}" + state: "forcereinstall" + +- name: Create a temp directory for collecting wheels + ansible.builtin.file: + path: /tmp/staging-wheels + state: directory + mode: '0755' + +- name: Create /dist directory for exported wheels + ansible.builtin.file: + path: /dist + state: directory + mode: '0755' + +- name: Rename and append +YYYYMMDD suffix to nightly wheels + ansible.builtin.shell: | + pushd /tmp/staging-wheels + cp {{ item.dir }}/*.whl . + rename -v "s/^{{ item.prefix }}-(.*?)-cp/{{ item.prefix }}-nightly-cp/" *.whl + mv /tmp/staging-wheels/* /dist/ + popd + + rename -v "s/^{{ item.prefix }}-(.*?)-cp/{{ item.prefix }}-nightly+$(date -u +%Y%m%d)-cp/" *.whl + args: + executable: /bin/bash + chdir: "{{ item.dir }}" + loop: + - { dir: "{{ (src_root, 'pytorch/dist') | path_join }}", prefix: "torch" } + - { dir: "{{ (src_root, 'pytorch/xla/dist') | path_join }}", prefix: "torch_xla" } + when: nightly_release + +- name: Copy wheels to /dist + ansible.builtin.shell: "cp {{ item }}/*.whl /dist" + args: + executable: /bin/bash + loop: + - "{{ (src_root, 'pytorch/dist') | path_join }}" + - "{{ (src_root, 'pytorch/xla/dist') | path_join }}" + +- name: Delete temp directory + ansible.builtin.file: + path: /tmp/staging-wheels + state: absent + mode: '0755' + +- name: "Tests" + include_tasks: tests.yaml + tags: + - tests diff --git a/infra/ansible/roles/build_srcs/tasks/tests.yaml b/infra/ansible/roles/build_srcs/tasks/tests.yaml new file mode 100644 index 00000000000..9e925700dde --- /dev/null +++ b/infra/ansible/roles/build_srcs/tasks/tests.yaml @@ -0,0 +1,9 @@ +- name: "Check that various import statements work" + ansible.builtin.command: + cmd: "{{ item }}" + environment: "{{ env_vars | combine({'USE_CUDA': 0}) }}" + loop: + - python -c "import torchgen" + - python -c "import torch" + - python -c "import torch_xla" + - python -c "import torch_xla.core.xla_model" diff --git a/infra/ansible/roles/configure_env/tasks/main.yaml b/infra/ansible/roles/configure_env/tasks/main.yaml new file mode 100644 index 00000000000..807912019d5 --- /dev/null +++ b/infra/ansible/roles/configure_env/tasks/main.yaml @@ -0,0 +1,13 @@ +- name: Append environment variables required during runtime to ~/.bashrc + ansible.builtin.lineinfile: + path: ~/.bashrc + line: "export {{ item }}={{ env_vars[item] }}" + create: true + loop: "{{ env_vars.keys() | list }}" + +- name: Append environment variables required during runtime to ~/.zshrc + ansible.builtin.lineinfile: + path: ~/.zshrc + line: "export {{ item }}={{ env_vars[item] }}" + create: true + loop: "{{ env_vars.keys() | list }}" diff --git a/infra/ansible/roles/fetch_srcs/defaults/main.yaml b/infra/ansible/roles/fetch_srcs/defaults/main.yaml new file mode 100644 index 00000000000..54b409da6a2 --- /dev/null +++ b/infra/ansible/roles/fetch_srcs/defaults/main.yaml @@ -0,0 +1,3 @@ +# See https://docs.ansible.com/ansible/latest/collections/ansible/builtin/git_module.html#parameter-version +pytorch_git_rev: HEAD +xla_git_rev: HEAD diff --git a/infra/ansible/roles/fetch_srcs/tasks/main.yaml b/infra/ansible/roles/fetch_srcs/tasks/main.yaml new file mode 100644 index 00000000000..75f6b123390 --- /dev/null +++ b/infra/ansible/roles/fetch_srcs/tasks/main.yaml @@ -0,0 +1,45 @@ +- name: "Create source root directory at {{ src_root }}" + ansible.builtin.file: + path: "{{ src_root }}" + state: directory + mode: '0755' + +- name: "Clone git PyTorch and XLA git repos" + ansible.builtin.git: + repo: "{{ item.repo }}" + dest: "{{ item.dest }}" + version: "{{ item.version }}" + depth: 1 + force: true + loop: + - repo: https://github.com/pytorch/pytorch + dest: "{{ (src_root, 'pytorch') | path_join }}" + version: "{{ pytorch_git_rev }}" + + - repo: https://github.com/pytorch/xla + dest: "{{ (src_root, 'pytorch/xla') | path_join }}" + version: "{{ xla_git_rev }}" + +- name: Find *.diff files in pytorch/xla/tf_patches + ansible.builtin.find: + path: "{{ (src_root, 'pytorch/xla/tf_patches') | path_join }}" + pattern: "*.diff" + register: tf_patches + +- name: Apply patches to Tensorflow + ansible.posix.patch: + src: "{{ item }}" + # Use source file on the target machine instead of the one where + # the playbook is located. Has no effect when the target machine is + # localhost. + remote_src: true + strip: 1 + ignore_whitespace: true + basedir: "{{ (src_root, 'pytorch/xla/third_party/tensorflow') | path_join }}" + loop: "{{ tf_patches.files | map(attribute='path') }}" + ignore_errors: true + +- name: "Tests" + include_tasks: tests.yaml + tags: + - tests diff --git a/infra/ansible/roles/fetch_srcs/tasks/tests.yaml b/infra/ansible/roles/fetch_srcs/tasks/tests.yaml new file mode 100644 index 00000000000..f3c341197dc --- /dev/null +++ b/infra/ansible/roles/fetch_srcs/tasks/tests.yaml @@ -0,0 +1,13 @@ +- name: Retrieve status of setup.py files in XLA and PyTorch repos + ansible.builtin.stat: + path: "{{ item }}" + register: _res + loop: + - "{{ (src_root, 'pytorch/setup.py') | path_join }}" + - "{{ (src_root, 'pytorch/xla/setup.py') | path_join }}" + +- name: Assert that setup.py files exist + ansible.builtin.assert: + that: "{{ item.stat.exists }}" + fail_msg: "{{ item.item }} doesn't exist" + loop: "{{ _res.results }}" diff --git a/infra/ansible/roles/install_deps/tasks/main.yaml b/infra/ansible/roles/install_deps/tasks/main.yaml new file mode 100644 index 00000000000..5782b3544c2 --- /dev/null +++ b/infra/ansible/roles/install_deps/tasks/main.yaml @@ -0,0 +1,35 @@ +- name: Add apt keys + # Don't use apt-key for adding repo keys since it's deprecated. + # Instead place gpg and pgp files in /usr/share/keyrings/ (debian, ubuntu). + ansible.builtin.get_url: + url: "{{ item.url }}" + dest: "{{ item.keyring }}" + mode: 'u=rw,g=r,o=r' + loop: "{{ apt_keys }}" + register: add_apt_key + +- name: Add apt repositories into sources list + ansible.builtin.apt_repository: + repo: "{{ item }}" + state: present + loop: "{{ apt_repos }}" + register: add_apt_repo + +- name: Update apt cache + apt: + update_cache: true + when: add_apt_key.changed or add_apt_repo.changed + +- name: Install apt packages + ansible.builtin.apt: + name: "{{ apt_pkgs }}" + update_cache: true + +- name: Install pip packages + ansible.builtin.pip: + name: "{{ pip_pkgs }}" + +- name: Install pip packages without deps (--no-deps) + ansible.builtin.pip: + name: "{{ pip_pkgs_nodeps }}" + extra_args: "--no-deps"