From febd2799b4e810b9a467010d8341b8bb8f9e3088 Mon Sep 17 00:00:00 2001 From: appukuttan-shailesh Date: Mon, 12 Aug 2024 01:01:55 +0200 Subject: [PATCH] Upload working prototype --- .cookiecutterrc | 41 +++++ .dockerignore | 211 +++++++++++++++++++++++ .github/dockerhub_login.bash | 32 ++++ .github/show_system_versions.bash | 31 ++++ .github/workflows/build-test-publish.yml | 53 ++++++ .gitignore | 191 ++++++++++++++++++++ .osparc/Makefile | 68 ++++++++ .osparc/bin/activate | 67 +++++++ .osparc/bin/ooil | 40 +++++ .osparc/bin/yq | 17 ++ .osparc/docker-compose.overwrite.yml | 6 + .osparc/metadata.yml | 37 ++++ .osparc/runtime.yml | 9 + Makefile | 181 +++++++++++++++++++ VERSION | 1 + docker-compose.yml | 31 ++++ docker/python/Dockerfile | 113 ++++++++++++ docker/python/entrypoint.sh | 73 ++++++++ service.cli/execute.sh | 58 +++++++ service.cli/run | 19 ++ src/file_format_converter/.gitkeep | 1 + src/file_format_converter/main.py | 135 +++++++++++++++ validation/input/data4.csv | 11 ++ validation/input/inputs.json | 4 + 24 files changed, 1430 insertions(+) create mode 100644 .cookiecutterrc create mode 100644 .dockerignore create mode 100755 .github/dockerhub_login.bash create mode 100755 .github/show_system_versions.bash create mode 100644 .github/workflows/build-test-publish.yml create mode 100644 .gitignore create mode 100644 .osparc/Makefile create mode 100755 .osparc/bin/activate create mode 100755 .osparc/bin/ooil create mode 100755 .osparc/bin/yq create mode 100644 .osparc/docker-compose.overwrite.yml create mode 100644 .osparc/metadata.yml create mode 100644 .osparc/runtime.yml create mode 100644 Makefile create mode 100644 VERSION create mode 100644 docker-compose.yml create mode 100644 docker/python/Dockerfile create mode 100755 docker/python/entrypoint.sh create mode 100755 service.cli/execute.sh create mode 100755 service.cli/run create mode 100644 src/file_format_converter/.gitkeep create mode 100644 src/file_format_converter/main.py create mode 100644 validation/input/data4.csv create mode 100644 validation/input/inputs.json diff --git a/.cookiecutterrc b/.cookiecutterrc new file mode 100644 index 0000000..c254b9d --- /dev/null +++ b/.cookiecutterrc @@ -0,0 +1,41 @@ +# This file exists so you can easily regenerate your project. +# +# `cookiepatcher` is a convenient shim around `cookiecutter` +# for regenerating projects (it will generate a .cookiecutterrc +# automatically for any template). To use it: +# +# pip install cookiepatcher +# cookiepatcher gh:itisfoundation/cookiecutter-osparc-service project-path +# +# See: +# https://pypi.python.org/pypi/cookiepatcher +# +# Alternatively, you can run: +# +# cookiecutter --overwrite-if-exists --config-file=project-path/.cookiecutterrc gh:itisfoundation/cookiecutter-osparc-service +# + +default_context: + + _checkout: None + _output_dir: '/home/shailesh' + _repo_dir: '/home/shailesh/.cookiecutters/cookiecutter-osparc-service' + _template: 'gh:ITISFoundation/cookiecutter-osparc-service' + author_affiliation: 'INT, Aix-Marseille University, France' + author_email: 'appukuttan.shailesh@gmail.com' + author_name: 'Shailesh Appukuttan' + contact_email: 'appukuttan.shailesh@gmail.com' + default_docker_registry: 'itisfoundation' + docker_base: 'python:3.11' + git_repo: 'github' + git_username: 'appukuttan-shailesh' + number_of_inputs: '2' + number_of_outputs: '1' + project_name: 'File Format Converter' + project_package_name: 'file_format_converter' + project_short_description: 'Module to allow conversion of files between various formats such as CSV, JSON, YAML, XLS, XLSX, NWB' + project_slug: 'file-format-converter' + project_type: 'computational' + release_date: '2024' + version: '0.1.0' + version_display: '0.1.0' diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..2dee4f0 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,211 @@ +## Common.dockerignore + +* +!src/ +!service.cli/ +!docker/ +!.osparc/ + +# Common +README.md +CHANGELOG.md +docker-compose.yml +Dockerfile + +# git +.git +.gitattributes +.gitignore +.git* + +## Common.gitignore + +# output folders +build/ +output/ +out/ + +# temporary folders +tmp/ + +# explicit mark +*ignore* +.tmp* + +# vscode configuration +.vscode + +# make outputs +pytest_*.xml +.compose* + +# validation folder +!validation/**/* +# docker ignore +!.dockerignore +# git ignore +!.gitignore + +## Python.gitignore +# FROM https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/.github/dockerhub_login.bash b/.github/dockerhub_login.bash new file mode 100755 index 0000000..29972d7 --- /dev/null +++ b/.github/dockerhub_login.bash @@ -0,0 +1,32 @@ +#!/bin/bash +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ +set -euo pipefail +IFS=$'\n\t' + +# check needed variables are defined +if [ ! -v DOCKER_USERNAME ] ||\ + [ ! -v DOCKER_PASSWORD ] ||\ + [ ! -v DOCKER_REGISTRY ]; then + echo "## ERROR: Please define the environs (DOCKER_USERNAME, DOCKER_PASSWORD, DOCKER_REGISTRY) in your CI settings!" + exit 1 +fi + +# check script needed variables +if [ ! -v OWNER ]; then + echo "## ERROR: incorrect usage of CI. OWNER (e.g. dockerhub organization like itisfoundation or user private name) not defined!" + exit 1 +fi + +# only upstream is allowed to push to itisfoundation repo +if [ "${OWNER,,}" != "itisfoundation" ] &&\ + { [ ! -v DOCKER_REGISTRY ] || [ -z "${DOCKER_REGISTRY}" ] || [ "$DOCKER_REGISTRY" = "itisfoundation" ]; }; then + echo "## ERROR: it is not allowed to push to the main dockerhub repository from a fork!" + echo "## Please adapt your CI-defined environs (DOCKER_USERNAME, DOCKER_PASSWORD, DOCKER_REGISTRY)" + exit 1 +fi + +# these variable must be available securely from the CI +echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin + +echo "logged into dockerhub successfully, ready to push" +exit 0 diff --git a/.github/show_system_versions.bash b/.github/show_system_versions.bash new file mode 100755 index 0000000..c2a3b86 --- /dev/null +++ b/.github/show_system_versions.bash @@ -0,0 +1,31 @@ +#!/bin/bash +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ +set -euo pipefail +IFS=$'\n\t' + +echo "------------------------------ environs -----------------------------------" +env + +echo "------------------------------ uname -----------------------------------" +uname -a +lsb_release -a + +echo "------------------------------ python -----------------------------------" +if command -v python; then + python --version +fi + +echo "------------------------------ python3 -----------------------------------" +if command -v python3; then + python3 --version +fi + +echo "------------------------------ docker -----------------------------------" +if command -v docker; then + docker version +fi + +echo "------------------------------ docker-compose -----------------------------------" +if command -v docker-compose; then + docker-compose version +fi diff --git a/.github/workflows/build-test-publish.yml b/.github/workflows/build-test-publish.yml new file mode 100644 index 0000000..ab26533 --- /dev/null +++ b/.github/workflows/build-test-publish.yml @@ -0,0 +1,53 @@ +name: Github-CI Push/PR file-format-converter + +on: + push: + pull_request: + +env: + # secrets can be set in settings/secrets on github + DOCKER_REGISTRY: ${{ secrets.DOCKER_REGISTRY }} + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + +jobs: + build: + name: building file-format-converter + runs-on: ${{ matrix.os }} + strategy: + matrix: + python: [3.9] + os: [ubuntu-22.04] + fail-fast: false + steps: + - uses: actions/checkout@v3 + - name: setup python environment + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python }} + - name: show versions + run: ./.github/show_system_versions.bash + - name: set owner variable + run: echo "OWNER=${GITHUB_REPOSITORY%/*}" >> $GITHUB_ENV + - name: set docker image tag + if: github.ref != 'refs/heads/master' + run: echo "DOCKER_IMAGE_TAG=${GITHUB_REF##*/}" >> $GITHUB_ENV + - name: get current image if available + run: make pull-latest || true + - name: build + run: | + make VERSION + make build + make info-build + - name: test + run: make tests + - if: github.event_name == 'push' && github.ref == 'refs/heads/master' + name: push + run: | + ./.github/dockerhub_login.bash + make push + - if: github.event_name == 'push' && github.ref != 'refs/heads/master' + name: push + run: | + ./.github/dockerhub_login.bash + make push-version diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..154e9ca --- /dev/null +++ b/.gitignore @@ -0,0 +1,191 @@ +## Common.gitignore + +# output folders +build/ +output/ +out/ + +# temporary folders +tmp/ + +# explicit mark +*ignore* +.tmp* + +# vscode configuration +.vscode + +# make outputs +pytest_*.xml +.compose* + +# validation folder +!validation/**/* +# docker ignore +!.dockerignore +# git ignore +!.gitignore + +## Python.gitignore +# FROM https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/.osparc/Makefile b/.osparc/Makefile new file mode 100644 index 0000000..57dbc75 --- /dev/null +++ b/.osparc/Makefile @@ -0,0 +1,68 @@ + +.DEFAULT_GOAL := help +SHELL := /bin/bash + + +REPO_DIR := $(abspath $(CURDIR)/..) + +# NOTE that IMAGES variable can change when docker-compose.yml gets rebuilt. Do NOT use := !!! +IMAGES = $(shell ./bin/yq eval '.services.*.image' docker-compose.yml) + +export DOCKER_REGISTRY ?= registry:5000 + + +.PHONY: info +info: ## lists environments and required tools + # tools + @echo 'docker : $(shell docker --version 2>/dev/null || echo ERROR required tool missing )' + @echo 'docker compose : $(shell docker compose version 2>/dev/null || echo ERROR required tool missing )' + @echo 'docker buildx : $(shell docker buildx version 2>/dev/null || echo WARNING optional tool missing )' + @echo 'make : $(shell make --version 2>&1 | head -n 1)' + @echo 'awk : $(shell awk -W version 2>&1 | head -n 1)' + @echo 'sed : $(shell sed --version 2>&1 | head -n 1)' + # environments + @echo "REPO_DIR = ${REPO_DIR}" + @echo "IMAGES = ${IMAGES}" + @echo "DOCKER_REGISTRY = ${DOCKER_REGISTRY}" + + +.PHONY: update-version +update-version: + cd $(REPO_DIR) \ + && . update_version.sh \ + && cd - + +.PHONY: compose +compose: update-version ## creates docker-compose.yml + # creating compose specs + cd $(REPO_DIR) \ + && .osparc/bin/ooil compose -f .osparc/docker-compose.yml + + +.PHONY: build build-nc +build build-nc: compose ## builds image. Suffix -nc disables cache + # building + docker compose build $(if $(findstring -nc,$@),--no-cache,) + + +.PHONY: push +push: ## retags and pushes to ${DOCKER_REGISTRY} + # tag & push + @for image in ${IMAGES}; do \ + echo "Tagging and pushing ${DOCKER_REGISTRY}/$$image ..."; \ + docker tag "$$image" "${DOCKER_REGISTRY}/$$image"; \ + docker push "${DOCKER_REGISTRY}/$$image"; \ + done + # registry view + @curl --silent ${DOCKER_REGISTRY}/v2/_catalog | jq + + + +.PHONY: all +all: compose build-nc push ## all workflow + @echo "'${IMAGES}' built, and pushed to '${DOCKER_REGISTRY}'" + + +.PHONY: help +help: ## help on rule's targets + @awk --posix 'BEGIN {FS = ":.*?## "} /^[[:alpha:][:space:]_-]+:.*?## / {printf "%-20s %s\n", $$1, $$2}' $(MAKEFILE_LIST) diff --git a/.osparc/bin/activate b/.osparc/bin/activate new file mode 100755 index 0000000..1bcfd52 --- /dev/null +++ b/.osparc/bin/activate @@ -0,0 +1,67 @@ +#!/bin/bash + +# Store the current PATH +export _OLD_OSPARC_PATH=$PATH + + +# Get the directory of the currently running script +BIN_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +OSPARC_ENV=$(dirname "${BIN_DIR}") + + +# Add .osparc/bin to the PATH +export PATH="$BIN_DIR/.osparc/bin:$PATH" + + +deactivate () { + # Unset the OLD_PATH variable + # ! [ -z ${VAR+_} ] returns true if VAR is declared at all + if ! [ -z "${_OLD_OSPARC_PATH:+_}" ] ; then + PATH="$_OLD_OSPARC_PATH" + export PATH + unset _OLD_OSPARC_PATH + fi + + # The hash command must be called to get it to forget past + # commands. Without forgetting past commands the $PATH changes + # we made may not be respected + hash -r 2>/dev/null + + + # Removes (osparc) in prompt + if ! [ -z "${_OLD_OSPARC_PS1+_}" ] ; then + PS1="$_OLD_OSPARC_PS1" + export PS1 + unset _OLD_OSPARC_PS1 + fi + + unset OSPARC_ENV + unset OSPARC_ENV_PROMPT + if [ ! "${1-}" = "nondestructive" ] ; then + # Self destruct! + unset -f deactivate + echo "osparc environment deactivated" + fi +} + +# unset irrelevant variables +deactivate nondestructive + + +# Adds (osparc) in prompt +if [ "xosparc" != x ] ; then + OSPARC_ENV_PROMPT=".osparc" +else + OSPARC_ENV_PROMPT=$(basename "$OSPARC_ENV") +fi +export OSPARC_ENV_PROMPT + + +if [ -z "${OSPARC_ENV_DISABLE_PROMPT-}" ] ; then + _OLD_OSPARC_PS1="${PS1-}" + PS1="(${OSPARC_ENV_PROMPT}) ${PS1-}" + export PS1 +fi + +# Inform the user +echo "Environment activated. To deactivate, type 'deactivate'" diff --git a/.osparc/bin/ooil b/.osparc/bin/ooil new file mode 100755 index 0000000..fb85469 --- /dev/null +++ b/.osparc/bin/ooil @@ -0,0 +1,40 @@ +#!/bin/bash +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ + +set -o errexit +set -o nounset +set -o pipefail +IFS=$'\n\t' + +IMAGE_NAME="itisfoundation/service-integration:${OOIL_IMAGE_TAG:-master-github-latest}" +WORKDIR="$(pwd)" + +# +# NOTE: with --interactive --tty the command below will +# produce colors in the outputs. The problem is that +# . ooil >VERSION will insert special color codes +# . in the VERSION file which make it unusable as a variable +# . when cat VERSION !! +# + +run() { + docker run \ + --rm \ + --pull=always \ + --volume="/etc/group:/etc/group:ro" \ + --volume="/etc/passwd:/etc/passwd:ro" \ + --user="$(id --user "$USER")":"$(id --group "$USER")" \ + --volume "$WORKDIR":/src \ + --workdir=/src \ + "$IMAGE_NAME" \ + "$@" +} + +# ---------------------------------------------------------------------- +# MAIN +# +# USAGE +# ooil --help + +run "$@" +# ---------------------------------------------------------------------- diff --git a/.osparc/bin/yq b/.osparc/bin/yq new file mode 100755 index 0000000..99b2315 --- /dev/null +++ b/.osparc/bin/yq @@ -0,0 +1,17 @@ +#!/bin/bash +set -o errexit +set -o nounset +set -o pipefail +IFS=$'\n\t' + +# Define variables +YQ_IMAGE="mikefarah/yq" +YQ_VERSION="4.12.0" + +# Define function to run yq inside container +run_yq() { + docker run --rm -v "$(pwd):/workdir" "$YQ_IMAGE:$YQ_VERSION" "$@" +} + +# Call function with arguments +run_yq "$@" diff --git a/.osparc/docker-compose.overwrite.yml b/.osparc/docker-compose.overwrite.yml new file mode 100644 index 0000000..94fc9c5 --- /dev/null +++ b/.osparc/docker-compose.overwrite.yml @@ -0,0 +1,6 @@ +version: "3.7" +services: + file-format-converter: + build: + dockerfile: docker/python/Dockerfile + target: production diff --git a/.osparc/metadata.yml b/.osparc/metadata.yml new file mode 100644 index 0000000..487f6e1 --- /dev/null +++ b/.osparc/metadata.yml @@ -0,0 +1,37 @@ +name: File Format Converter +key: simcore/services/comp/file-format-converter +type: computational +integration-version: 1.0.0 +version: 0.1.0 +description: Module to allow conversion of files between various formats such as CSV, JSON, YAML, XLS, XLSX, NWB +contact: appukuttan.shailesh@gmail.com +thumbnail: https://github.com/ITISFoundation/osparc-assets/blob/cb43207b6be2f4311c93cd963538d5718b41a023/assets/default-thumbnail-cookiecutter-osparc-service.png?raw=true +authors: + - name: Shailesh Appukuttan + email: appukuttan.shailesh@gmail.com + affiliation: INT, Aix-Marseille University, France +inputs: + input_1: + displayOrder: 1 + label: source file + description: path of file to be converted + type: data:*/* + input_2: + displayOrder: 2 + label: target format + description: required format of the output file + type: ref_contentSchema + contentSchema: + title: target_format + default: json + enum: [csv, json, yaml, xls, xlsx, nwb, parquet, feather, pkl, xml, toml, h5, hdf5] + +outputs: + output_1: + displayOrder: 1 + label: converted file + description: path to the converted file + type: data:*/* + fileToKeyMap: + data.json: output_1 + diff --git a/.osparc/runtime.yml b/.osparc/runtime.yml new file mode 100644 index 0000000..c0d3d9f --- /dev/null +++ b/.osparc/runtime.yml @@ -0,0 +1,9 @@ +restart-policy: no-restart +settings: + - name: Resources + type: Resources + value: + Limits: + NanoCPUs: 1000000000 # 100% of CPU cycles on 1 CPU + MemoryBytes: 2147483648 # 2 Gigabytes + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8b4444f --- /dev/null +++ b/Makefile @@ -0,0 +1,181 @@ +# +# Author: Shailesh Appukuttan + +SHELL = /bin/sh +.DEFAULT_GOAL := help + +export VCS_URL := $(shell git config --get remote.origin.url 2> /dev/null || echo unversioned repo) +export VCS_REF := $(shell git rev-parse --short HEAD 2> /dev/null || echo unversioned repo) +export VCS_STATUS := $(if $(shell git status -s 2> /dev/null || echo unversioned repo),'modified/untracked','clean') +export BUILD_DATE := $(shell date -u +"%Y-%m-%dT%H:%M:%SZ") + +export DOCKER_REGISTRY ?= itisfoundation +export DOCKER_IMAGE_NAME ?= file-format-converter +export DOCKER_IMAGE_TAG ?= $(shell cat VERSION 2> /dev/null || echo undefined) + +export COMPOSE_INPUT_DIR := ./validation/input +export COMPOSE_OUTPUT_DIR := .tmp/output + +OSPARC_DIR:=$(CURDIR)/.osparc + +APP_NAME := file-format-converter + + + + +# INTEGRATION ----------------------------------------------------------------- +METADATA := .osparc/metadata.yml + + +.PHONY: VERSION +VERSION: $(METADATA) ## generates VERSION from metadata + # updating $@ from $< + @$(OSPARC_DIR)/bin/ooil get-version --metadata-file $< > $@ + +service.cli/run: $(METADATA) ## generates run from metadata + # Updates adapter script from metadata in $< + @$(OSPARC_DIR)/bin/ooil run-creator --metadata $< --runscript $@ + +docker-compose.yml: $(METADATA) ## generates docker-compose + # Injects metadata from $< as labels + @$(OSPARC_DIR)/bin/ooil compose --to-spec-file $@ --metadata $< + + + +# BUILD ----------------------------------------------------------------- + +define _docker_compose_build +export DOCKER_BUILD_TARGET=$(if $(findstring -devel,$@),development,$(if $(findstring -cache,$@),cache,production)); \ + docker compose -f docker-compose.yml build $(if $(findstring -nc,$@),--no-cache,); +endef + + +.PHONY: build build-devel build-nc build-devel-nc +build build-devel build-nc build-devel-nc: VERSION docker-compose.yml service.cli/run ## builds image + # building image local/${DOCKER_IMAGE_NAME}... + @$(call _docker_compose_build) + +define show-meta + $(foreach iid,$(shell docker images */$(1):* -q | sort | uniq),\ + docker image inspect $(iid) | jq '.[0] | .RepoTags, .ContainerConfig.Labels, .Config.Labels';) +endef + + +.PHONY: info-build +info-build: ## displays info on the built image + # Built images + @docker images */$(DOCKER_IMAGE_NAME):* + # Tags and labels + @$(call show-meta,$(DOCKER_IMAGE_NAME)) + + +# TESTS----------------------------------------------------------------- +.PHONY: test tests +test tests: ## runs validation tests + @$(OSPARC_DIR)/bin/ooil test . + + + +# PUBLISHING ----------------------------------------------------------------- + +.PHONY: version-service-patch version-service-minor version-service-major +version-service-patch version-service-minor version-service-major: $(METADATA) ## kernel/service versioning as patch + $(OSPARC_DIR)/bin/ooil bump-version --metadata-file $< --upgrade $(subst version-service-,,$@) + # syncing metadata upstream + @$(MAKE) VERSION + + +.PHONY: tag-local +tag-local: + docker tag ${DOCKER_REGISTRY}/${DOCKER_IMAGE_NAME}:$(if $(findstring version,$@),$(DOCKER_IMAGE_NAME),latest) local/$(DOCKER_IMAGE_NAME):production + +.PHONY: push push-force push-version push-latest pull-latest pull-version tag-latest tag-version +tag-latest tag-version: + docker tag simcore/services/comp/${DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(if $(findstring version,$@),$(DOCKER_IMAGE_TAG),latest) + +version_valid = $(shell test $$(echo $(DOCKER_IMAGE_TAG) | cut --fields=1 --delimiter=.) -gt 0 > /dev/null && echo "image version is valid") +version_exists = $(shell DOCKER_CLI_EXPERIMENTAL=enabled docker manifest inspect $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) > /dev/null && echo "image already exists on $(DOCKER_REGISTRY)") +push push-force: ## pushes (resp. force) services to the registry if service not available in registry. + @$(if $(findstring force,$@),,\ + $(if $(call version_valid),$(info version is valid), $(error $(DOCKER_IMAGE_TAG) is not a valid version (major>=1)))\ + $(if $(call version_exists),$(error $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) already exists on $(DOCKER_REGISTRY)), $(info no version found on $(DOCKER_REGISTRY)))\ + ) + @$(MAKE) push-version; + @$(MAKE) push-latest; + +.PHONY: publish-local +publish-local: ## push to local throw away registry to test integration + docker tag simcore/services/comp/${DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} registry:5000/simcore/services/comp/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) + docker push registry:5000/simcore/services/comp/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) + @curl registry:5000/v2/_catalog | jq + +push-latest push-version: ## publish service to registry with latest/version tag + # pushing '$(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(if $(findstring version,$@),$(DOCKER_IMAGE_TAG),latest)'... + @$(MAKE) tag-$(subst push-,,$@) + @docker push $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(if $(findstring version,$@),$(DOCKER_IMAGE_TAG),latest) + # pushed '$(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(if $(findstring version,$@),$(DOCKER_IMAGE_TAG),latest)' + +pull-latest pull-version: ## pull service from registry + @docker pull $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(if $(findstring version,$@),$(DOCKER_IMAGE_TAG),latest) + + + + +# COOCKIECUTTER ----------------------------------------------------------------- + +.PHONY: replay +replay: .cookiecutterrc ## re-applies cookiecutter + # Replaying gh:ITISFoundation/cookiecutter-osparc-service ... + @cookiecutter --no-input --overwrite-if-exists \ + --config-file=$< \ + --output-dir="$(abspath $(CURDIR)/..)" \ + "gh:ITISFoundation/cookiecutter-osparc-service" + + + +.PHONY: info +info: ## general info + # env vars: version control + @echo " VCS_URL : $(VCS_URL)" + @echo " VCS_REF : $(VCS_REF)" + @echo " VCS_STATUS : $(VCS_STATUS)" + # env vars: docker + @echo " DOCKER_REGISTRY : $(DOCKER_REGISTRY)" + @echo " DOCKER_IMAGE_NAME : $(DOCKER_IMAGE_NAME)" + @echo " DOCKER_IMAGE_TAG : $(DOCKER_IMAGE_TAG)" + @echo " COMPOSE_INPUT_DIR : $(COMPOSE_INPUT_DIR)" + @echo " COMPOSE_OUTPUT_DIR : $(COMPOSE_OUTPUT_DIR)" + @echo " BUILD_DATE : $(BUILD_DATE)" + # exe: recommended dev tools + @echo ' git : $(shell git --version 2>/dev/null || echo not found)' + @echo ' make : $(shell make --version 2>&1 | head -n 1)' + @echo ' jq : $(shell jq --version 2>/dev/null || echo not found z)' + @echo ' awk : $(shell awk -W version 2>&1 | head -n 1 2>/dev/null || echo not found)' + @echo ' python : $(shell python3 --version 2>/dev/null || echo not found )' + @echo ' docker : $(shell docker --version)' + @echo ' docker buildx : $(shell docker buildx version)' + @echo ' docker-compose : $(shell docker-compose --version)' + # exe: integration tools + @echo ' ooil version : $(shell $(OSPARC_DIR)/bin/ooil --version)' + + + +# MISC ----------------------------------------------------------------- + +.PHONY: help +help: ## this colorful help + @echo "Recipes for '$(notdir $(CURDIR))':" + @echo "" + @awk --posix 'BEGIN {FS = ":.*?## "} /^[[:alpha:][:space:]_-]+:.*?## / {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) + @echo "" + + +.PHONY: clean +git_clean_args = -dxf --exclude=.vscode/ + +clean: ## cleans all unversioned files in project and temp files create by this makefile + # Cleaning unversioned + @git clean -n $(git_clean_args) + @echo -n "Are you sure? [y/N] " && read ans && [ $${ans:-N} = y ] + @echo -n "$(shell whoami), are you REALLY sure? [y/N] " && read ans && [ $${ans:-N} = y ] + @git clean $(git_clean_args) diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..6c6aa7c --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.1.0 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6310223 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,31 @@ +version: '3.7' +services: + file-format-converter: + build: + context: ./ + dockerfile: docker/python/Dockerfile + labels: + io.simcore.name: '{"name":"File Format Converter"}' + io.simcore.thumbnail: '{"thumbnail":"https://github.com/ITISFoundation/osparc-assets/blob/cb43207b6be2f4311c93cd963538d5718b41a023/assets/default-thumbnail-cookiecutter-osparc-service.png?raw=true"}' + io.simcore.description: '{"description":"Module to allow conversion of files + between various formats such as CSV, JSON, YAML, XLS, XLSX, NWB"}' + io.simcore.key: '{"key":"simcore/services/comp/file-format-converter"}' + io.simcore.version: '{"version":"0.1.0"}' + io.simcore.integration-version: '{"integration-version":"1.0.0"}' + io.simcore.type: '{"type":"computational"}' + io.simcore.authors: '{"authors":[{"name":"Shailesh Appukuttan","email":"appukuttan.shailesh@gmail.com","affiliation":"INT, + Aix-Marseille University, France"}]}' + io.simcore.contact: '{"contact":"appukuttan.shailesh@gmail.com"}' + io.simcore.inputs: '{"inputs":{"input_1":{"displayOrder":1.0,"label":"source + file","description":"path of file to be converted","type":"data:*/*"},"input_2":{"displayOrder":2.0,"label":"target + format","description":"required format of the output file","type":"ref_contentSchema","contentSchema":{"title":"target_format","default":"json","enum":["csv","json","yaml","xls","xlsx","nwb","parquet","feather","pkl","xml","toml","h5","hdf5"]}}}}' + io.simcore.outputs: '{"outputs":{"output_1":{"displayOrder":1.0,"label":"converted + file","description":"path to the converted file","type":"data:*/*","fileToKeyMap":{"data.json":"output_1"}}}}' + org.label-schema.build-date: 2024-08-11T20:55:08+0000 + org.label-schema.schema-version: '1.0' + org.label-schema.vcs-ref: '' + org.label-schema.vcs-url: '' + simcore.service.restart-policy: no-restart + simcore.service.settings: '[{"name":"Resources","type":"Resources","value":{"Limits":{"NanoCPUs":1000000000,"MemoryBytes":2147483648}}}]' + target: production + image: simcore/services/comp/file-format-converter:0.1.0 diff --git a/docker/python/Dockerfile b/docker/python/Dockerfile new file mode 100644 index 0000000..3109a78 --- /dev/null +++ b/docker/python/Dockerfile @@ -0,0 +1,113 @@ +FROM python:3.11 as base +# +# USAGE: +# cd services/file-format-converter +# docker build -f Dockerfile -t file-format-converter:prod --target production ../../ +# docker run file-format-converter:prod +# + +LABEL maintainer=appukuttan-shailesh + +# simcore-user uid=8004(${SC_USER_NAME}) gid=8004(${SC_USER_NAME}) groups=8004(${SC_USER_NAME}) +ENV SC_USER_ID 8004 +ENV SC_USER_NAME scu +RUN adduser --uid ${SC_USER_ID} --disabled-password --gecos "" --shell /bin/sh --home /home/${SC_USER_NAME} ${SC_USER_NAME} + +RUN apt-get update \ + && apt-get -y install --no-install-recommends \ + python3-venv \ + python3-setuptools \ + gzip \ + git \ + jq \ + && pip3 install --upgrade pip \ + && rm --recursive --force /var/lib/apt/lists/* + +# -------------------------- Build stage ------------------- +# Installs build/package management tools and third party dependencies +# +# + /build WORKDIR +# + +FROM base as build + +ENV SC_BUILD_TARGET build + +# ------------------------------------------------------------------------------------ +#TODO: +# uncomment and adapt if build dependencies shall be installed +#RUN apt-get update \ +# && apt-get -y install --no-install-recommends \ +# gcc \ +# git \ +# && rm -rf /var/lib/apt/lists/* + +# uncomment and adapt if python necessary +#RUN $SC_PIP install --upgrade pip wheel setuptools +RUN pip install --upgrade pip wheel setuptools + +# ------------------------------------------------------------------------------------ + +WORKDIR /build +# defines the output of the build +RUN mkdir --parents /build/bin +# copy src code +COPY --chown=${SC_USER_NAME}:${SC_USER_NAME} src/file_format_converter src/file_format_converter +# ------------------------------------------------------------------------------------ +#TODO: +# uncomment and adapt if build steps are necessary +RUN cp -R src/file_format_converter/* /build/bin +# ------------------------------------------------------------------------------------ + +# --------------------------Production stage ------------------- +# Final cleanup up to reduce image size and startup setup +# Runs as ${SC_USER_NAME} (non-root user) +# +# + /home/${SC_USER_NAME} $HOME = WORKDIR +# + file_format_converter [${SC_USER_NAME}:${SC_USER_NAME}] +# + docker [${SC_USER_NAME}:${SC_USER_NAME}] +# + service.cli [${SC_USER_NAME}:${SC_USER_NAME}] +# +FROM base as production + +ENV SC_BUILD_TARGET production +ENV SC_BOOT_MODE production + + +ENV INPUT_FOLDER="/input" \ + OUTPUT_FOLDER="/output" + + +WORKDIR /home/${SC_USER_NAME} + +# ------------------------------------------------------------------------------------ +#TODO: +# uncomment and adapt to install runtime dependencies +#RUN apt-get update \ +# && apt-get -y install --no-install-recommends \ +# && rm -rf /var/lib/apt/lists/* +# ------------------------------------------------------------------------------------ + +# copy docker bootup scripts +COPY --chown=${SC_USER_NAME}:${SC_USER_NAME} docker/python/*.sh docker/ +# copy simcore service cli +COPY --chown=${SC_USER_NAME}:${SC_USER_NAME} service.cli/ service.cli/ +# necessary to be able to call run directly without sh in front +ENV PATH="/home/${SC_USER_NAME}/service.cli:${PATH}" + +# copy binaries from build +COPY --from=build --chown=${SC_USER_NAME}:${SC_USER_NAME} /build/bin file_format_converter +RUN pip install --upgrade pip wheel setuptools pandas pyyaml h5py pyarrow toml xlrd==1.2.0 + +# ------------------------------------------------------------------------------------ +#TODO: +# uncomment and provide a healtchecker if possible +# HEALTHCHECK --interval=30s \ +# --timeout=120s \ +# --start-period=30s \ +# --retries=3 \ +# CMD ["healthchecker app"] +# ------------------------------------------------------------------------------------ + +ENTRYPOINT [ "/bin/sh", "docker/entrypoint.sh", "/bin/sh", "-c" ] +CMD ["run"] diff --git a/docker/python/entrypoint.sh b/docker/python/entrypoint.sh new file mode 100755 index 0000000..b29fd56 --- /dev/null +++ b/docker/python/entrypoint.sh @@ -0,0 +1,73 @@ +#!/bin/sh +set -o errexit +set -o nounset + +IFS=$(printf '\n\t') +# This entrypoint script: +# +# - Executes *inside* of the container upon start as --user [default root] +# - Notice that the container *starts* as --user [default root] but +# *runs* as non-root user [$SC_USER_NAME] +# +echo Entrypoint for stage "${SC_BUILD_TARGET}" ... +echo User : "$(id "$(whoami)")" +echo Workdir : "$(pwd)" + + +# expect input/output folders to be mounted +stat "${INPUT_FOLDER}" > /dev/null 2>&1 || \ + (echo "ERROR: You must mount '${INPUT_FOLDER}' to deduce user and group ids" && exit 1) +stat "${OUTPUT_FOLDER}" > /dev/null 2>&1 || \ + (echo "ERROR: You must mount '${OUTPUT_FOLDER}' to deduce user and group ids" && exit 1) + +# NOTE: expects docker run ... -v /path/to/input/folder:${INPUT_FOLDER} +# check input/output folders are owned by the same user +if [ "$(stat -c %u "${INPUT_FOLDER}")" -ne "$(stat -c %u "${OUTPUT_FOLDER}")" ] +then + echo "ERROR: '${INPUT_FOLDER}' and '${OUTPUT_FOLDER}' have different user id's. not allowed" && exit 1 +fi +# check input/outputfolders are owned by the same group +if [ "$(stat -c %g "${INPUT_FOLDER}")" -ne "$(stat -c %g "${OUTPUT_FOLDER}")" ] +then + echo "ERROR: '${INPUT_FOLDER}' and '${OUTPUT_FOLDER}' have different group id's. not allowed" && exit 1 +fi + +echo "setting correct user id/group id..." +HOST_USERID=$(stat --format=%u "${INPUT_FOLDER}") +HOST_GROUPID=$(stat --format=%g "${INPUT_FOLDER}") +CONT_GROUPNAME=$(getent group "${HOST_GROUPID}" | cut --delimiter=: --fields=1) +if [ "$HOST_USERID" -eq 0 ] +then + echo "Warning: Folder mounted owned by root user... adding $SC_USER_NAME to root..." + adduser "$SC_USER_NAME" root +else + echo "Folder mounted owned by user $HOST_USERID:$HOST_GROUPID-'$CONT_GROUPNAME'..." + # take host's credentials in $SC_USER_NAME + if [ -z "$CONT_GROUPNAME" ] + then + echo "Creating new group my$SC_USER_NAME" + CONT_GROUPNAME=my$SC_USER_NAME + addgroup --gid "$HOST_GROUPID" "$CONT_GROUPNAME" + else + echo "group already exists" + fi + echo "adding $SC_USER_NAME to group $CONT_GROUPNAME..." + adduser "$SC_USER_NAME" "$CONT_GROUPNAME" + + echo "changing $SC_USER_NAME:$SC_USER_NAME ($SC_USER_ID:$SC_USER_ID) to $SC_USER_NAME:$CONT_GROUPNAME ($HOST_USERID:$HOST_GROUPID)" + usermod --uid "$HOST_USERID" --gid "$HOST_GROUPID" "$SC_USER_NAME" + + echo "Changing group properties of files around from $SC_USER_ID to group $CONT_GROUPNAME" + find / \( -path /proc -o -path /sys \) -prune -o -group "$SC_USER_ID" -exec chgrp --no-dereference "$CONT_GROUPNAME" {} \; + # change user property of files already around + echo "Changing ownership properties of files around from $SC_USER_ID to group $CONT_GROUPNAME" + find / \( -path /proc -o -path /sys \) -prune -o -user "$SC_USER_ID" -exec chown --no-dereference "$SC_USER_NAME" {} \; +fi + +echo "Starting $* ..." +echo " $SC_USER_NAME rights : $(id "$SC_USER_NAME")" +echo " local dir : $(ls -al)" +echo " input dir : $(ls -al "${INPUT_FOLDER}")" +echo " output dir : $(ls -al "${OUTPUT_FOLDER}")" + +su --command "export PATH=${PATH}:/home/$SC_USER_NAME/service.cli; $*" "$SC_USER_NAME" \ No newline at end of file diff --git a/service.cli/execute.sh b/service.cli/execute.sh new file mode 100755 index 0000000..8f5ec88 --- /dev/null +++ b/service.cli/execute.sh @@ -0,0 +1,58 @@ +#!/bin/sh +# set sh strict mode +set -o errexit +set -o nounset +IFS=$(printf '\n\t') + +cd /home/scu/file_format_converter + +echo "starting service as" +echo User : "$(id "$(whoami)")" +echo Workdir : "$(pwd)" +echo Python Version: "$(python --version)" +echo "..." +echo +# ---------------------------------------------------------------- +# This script shall be modified according to the needs in order to run the service +# The inputs defined in ${INPUT_FOLDER}/inputs.json are available as env variables by their key in capital letters +# For example: input_1 -> $INPUT_1 + +# put the code to execute the service here +# For example: +# env +echo "Input folder content:" +ls -al "${INPUT_FOLDER}" +echo "INPUT_1:", $INPUT_1 +echo "INPUT_2:", $INPUT_2 +# echo "TEST DATA" > $OUTPUT_FOLDER/outputs_pre.txt + +# from the list of files in the input folder, get the first one not named 'inputs.json' +# and use it as the input file +INPUT_FILE=$(ls -1 ${INPUT_FOLDER} | grep -v 'inputs.json' | head -n 1) +echo "INPUT_FILE:", $INPUT_FILE + +python main.py $INPUT_FOLDER/$INPUT_FILE $INPUT_2 + +# then retrieve the output and move it to the $OUTPUT_FOLDER +# as defined in the output labels +# For example: cp output.csv $OUTPUT_FOLDER or to $OUTPUT_FOLDER/outputs.json using jq +echo "Input folder content:" +ls -al "${INPUT_FOLDER}" +echo "Output folder content:" +ls -al "${OUTPUT_FOLDER}" +echo "PWD folder content:" +ls -al + +# display the contents of log_output_filepath.txt +echo "Content of log_output_filepath.txt:" +echo "$(cat log_output_filepath.txt)" + +cp "$(cat log_output_filepath.txt)" $OUTPUT_FOLDER +# echo "TEST DATA" > $OUTPUT_FOLDER/outputs.txt + +#TODO: Replace following +cat > "${OUTPUT_FOLDER}"/outputs.json << EOF +{ + "output_1":"$(cat log_output_filepath.txt)" +} +EOF diff --git a/service.cli/run b/service.cli/run new file mode 100755 index 0000000..6a97e86 --- /dev/null +++ b/service.cli/run @@ -0,0 +1,19 @@ + +#!/bin/sh +#--------------------------------------------------------------- +# AUTO-GENERATED CODE, do not modify this will be overwritten!!! +#--------------------------------------------------------------- +# shell strict mode: +set -o errexit +set -o nounset +IFS=$(printf '\n\t') +cd "$(dirname "$0")" +json_input=$INPUT_FOLDER/inputs.json + +INPUT_1=$INPUT_FOLDER/input_1 +export INPUT_1 +INPUT_2=$(< "$json_input" jq '.input_2') +export INPUT_2 + +exec execute.sh + \ No newline at end of file diff --git a/src/file_format_converter/.gitkeep b/src/file_format_converter/.gitkeep new file mode 100644 index 0000000..788d55e --- /dev/null +++ b/src/file_format_converter/.gitkeep @@ -0,0 +1 @@ +add source code here diff --git a/src/file_format_converter/main.py b/src/file_format_converter/main.py new file mode 100644 index 0000000..7bfc05d --- /dev/null +++ b/src/file_format_converter/main.py @@ -0,0 +1,135 @@ +import argparse + +import pandas as pd +import json +import yaml +import h5py +import pyarrow.parquet as pq +import pyarrow.feather as feather +import pickle +import xml.etree.ElementTree as ET +import toml +import os + +class FileConverter: + def __init__(self, input_file): + self.input_file = input_file + self.input_format = self._get_file_format(input_file) + self.data = None + + def _get_file_format(self, filename): + return filename.split('.')[-1].lower() + + def read_file(self): + if self.input_format == 'csv': + self.data = pd.read_csv(self.input_file) + elif self.input_format == 'xls': + self.data = pd.read_excel(self.input_file, engine='xlrd') + elif self.input_format == 'xlsx': + self.data = pd.read_excel(self.input_file) + elif self.input_format == 'json': + with open(self.input_file, 'r') as f: + self.data = json.load(f) + elif self.input_format == 'yaml': + with open(self.input_file, 'r') as f: + self.data = yaml.safe_load(f) + elif self.input_format in ['h5', 'hdf5']: + self.data = pd.read_hdf(self.input_file) + elif self.input_format == 'parquet': + self.data = pd.read_parquet(self.input_file) + elif self.input_format == 'feather': + self.data = pd.read_feather(self.input_file) + elif self.input_format == 'pkl': + with open(self.input_file, 'rb') as f: + self.data = pickle.load(f) + elif self.input_format == 'xml': + tree = ET.parse(self.input_file) + self.data = tree.getroot() + elif self.input_format == 'toml': + with open(self.input_file, 'r') as f: + self.data = toml.load(f) + elif self.input_format == 'nwb': + with h5py.File(self.input_file, 'r') as f: + self.data = f['data'][:] + else: + raise ValueError(f"Unsupported file format: {self.input_format}") + + def write_file(self, output_file): + output_format = self._get_file_format(output_file) + + if output_format == 'csv': + self.data.to_csv(output_file, index=False) + elif output_format in ['xls', 'xlsx']: + self.data.to_excel(output_file, index=False) + elif output_format == 'json': + if isinstance(self.data, pd.DataFrame): + self.data = self.data.to_dict(orient='records') + with open(output_file, 'w') as f: + json.dump(self.data, f, indent=4) + elif output_format == 'yaml': + if isinstance(self.data, pd.DataFrame): + self.data = self.data.to_dict(orient='records') + with open(output_file, 'w') as f: + yaml.dump(self.data, f, default_flow_style=False) + elif output_format in ['h5', 'hdf5']: + self.data.to_hdf(output_file, key='df', mode='w') + elif output_format == 'parquet': + self.data.to_parquet(output_file) + elif output_format == 'feather': + self.data.to_feather(output_file) + elif output_format == 'pkl': + with open(output_file, 'wb') as f: + pickle.dump(self.data, f) + elif output_format == 'xml': + tree = ET.ElementTree(self.data) + tree.write(output_file) + elif output_format == 'toml': + with open(output_file, 'w') as f: + toml.dump(self.data, f) + elif output_format == 'nwb': + with h5py.File(output_file, 'w') as f: + f.create_dataset('data', data=self.data) + else: + raise ValueError(f"Unsupported output file format: {output_format}") + + def preview(self, num_rows=5): + if isinstance(self.data, pd.DataFrame): + return self.data.head(num_rows) + else: + return self.data + + def transform(self, transformation_func): + if isinstance(self.data, pd.DataFrame): + self.data = transformation_func(self.data) + else: + raise ValueError("Data transformations are only supported for tabular data formats.") + + def convert(self, output_file): + self.read_file() + self.write_file(output_file) + print(f"Converted {self.input_file} to {output_file} successfully.") + # write output file path to a file + with open('log_output_filepath.txt', 'w') as f: + f.write(output_file) + +# Example Usage +if __name__ == "__main__": + # first argument to file is the input file and second argument is the desired output format + # use argparse to parse command line arguments + parser = argparse.ArgumentParser() + parser.add_argument('input_file', help='Input file to convert') + parser.add_argument('output_format', help='Output file format') + args = parser.parse_args() + + input_file = args.input_file + output_format = args.output_format + # trim double quotes if present + if output_format[0] == '"' and output_format[-1] == '"': + output_format = output_format[1:-1] + + # get input file name with extension without path + output_file = f"{os.path.splitext(os.path.basename(input_file))[0]}.{output_format}" + print(f"Converting {input_file} to {output_format} to produce {output_file}") + + converter = FileConverter(input_file) + converter.convert(output_file) diff --git a/validation/input/data4.csv b/validation/input/data4.csv new file mode 100644 index 0000000..0260703 --- /dev/null +++ b/validation/input/data4.csv @@ -0,0 +1,11 @@ +Provider Number,Hospital Name,Address,City,State,ZIP Code,County,Phone Number,Hospital Type,Hospital Ownership,Emergency Services +10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,3347938701,Acute Care Hospitals,Government - Hospital District or Authority,Yes +10005,MARSHALL MEDICAL CENTER SOUTH,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,MARSHALL,2565938310,Acute Care Hospitals,Government - Hospital District or Authority,Yes +10006,ELIZA COFFEE MEMORIAL HOSPITAL,205 MARENGO STREET,FLORENCE,AL,35631,LAUDERDALE,2567688400,Acute Care Hospitals,Government - Hospital District or Authority,Yes +10007,MIZELL MEMORIAL HOSPITAL,702 N MAIN ST,OPP,AL,36467,COVINGTON,3344933541,Acute Care Hospitals,Voluntary non-profit - Private,Yes +10008,CRENSHAW COMMUNITY HOSPITAL,101 HOSPITAL CIRCLE,LUVERNE,AL,36049,CRENSHAW,3343353374,Acute Care Hospitals,Proprietary,Yes +10009,HARTSELLE MEDICAL CENTER,201 PINE STREET NORTHWEST,HARTSELLE,AL,35640,MORGAN,2567736511,Acute Care Hospitals,Proprietary,Not Available +10010,MARSHALL MEDICAL CENTER NORTH,8000 ALABAMA HIGHWAY 69,GUNTERSVILLE,AL,35976,MARSHALL,2565718000,Acute Care Hospitals,Government - Hospital District or Authority,Yes +10011,ST VINCENT'S EAST,50 MEDICAL PARK EAST DRIVE,BIRMINGHAM,AL,35235,JEFFERSON,2058383122,Acute Care Hospitals,Voluntary non-profit - Private,Yes +10012,DEKALB REGIONAL MEDICAL CENTER,200 MED CENTER DRIVE,FORT PAYNE,AL,35968,DE KALB,2568453150,Acute Care Hospitals,Proprietary,Yes +10015,SOUTHWEST ALABAMA MEDICAL CENTER,33700 HIGHWAY 43,THOMASVILLE,AL,36784,CLARKE,3346366221,Acute Care Hospitals,Proprietary,Not Available diff --git a/validation/input/inputs.json b/validation/input/inputs.json new file mode 100644 index 0000000..4339902 --- /dev/null +++ b/validation/input/inputs.json @@ -0,0 +1,4 @@ +{ + "input_1": "some_stuff", + "input_2": "json" +}