diff --git a/.gitignore b/.gitignore index b1bf70e72..032ba600d 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,4 @@ share/python-wheels/ MANIFEST venv .history +.conda_env_created diff --git a/Dockerfile.moeflow_worker b/Dockerfile.moeflow_worker index 47ca8e446..a3177c39e 100644 --- a/Dockerfile.moeflow_worker +++ b/Dockerfile.moeflow_worker @@ -1,9 +1,11 @@ -# FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime -FROM continuumio/miniconda3 +FROM mambaorg/micromamba:2-debian12-slim -RUN apt update \ - && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata build-essential python3-opencv +COPY ./conda.yaml /tmp/conda.yaml +RUN --mount=type=cache,uid=57439,gid=57439,target=/opt/conda/pkgs micromamba env create --yes --file /tmp/conda.yaml +# RUN apt update \ +# && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata build-essential +COPY --chown=mambauser . /app WORKDIR /app -COPY . /app -RUN --mount=type=cache,target=/root/.conda conda env update --prune --file conda.yaml -RUN conda run -n mit-py311 --no-capture-output python3 docker_prepare.py +RUN micromamba run -n mit-py311 python3 -mvenv --system-site-packages venv +RUN venv/bin/pip install -r requirements-moeflow.txt +RUN venv/bin/python docker_prepare.py --models ocr.48px,ocr.48px_ctc,ocr.32px,ocr.mocr diff --git a/Makefile b/Makefile index 1a119d7ec..bd3b56a54 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,11 @@ .PHONY: default +CONDA_ENV = mit-py311 +CONDA_YML ?= conda.yaml + default: @echo Please use other targets -conda-env: - conda env update --prune --file conda.yaml - run-worker: conda run -n mit-py311 --no-capture-output celery --app moeflow_worker worker --queues mit --loglevel=debug --concurrency=1 @@ -25,3 +25,15 @@ run-web-server: --use-gpu \ --host=0.0.0.0 \ --port=5003 + +install-venv-deps: + venv/bin/pip install -r requirements-moeflow.txt + +conda-venv: .conda_env_created # alt to `venv/.venv_created` target, but uses conda python to create venv + micromamba run --attach '' -n $(CONDA_ENV) python3 -mvenv --system-site-packages ./venv + touch venv/.venv_created + +.conda_env_created: $(CONDA_YML) + # setup conda environment AND env-wise deps + micromamba env create -n $(CONDA_ENV) --yes -f $(CONDA_YML) + touch $@ diff --git a/conda.yaml b/conda.yaml index 3c135a3de..774f4d4ab 100644 --- a/conda.yaml +++ b/conda.yaml @@ -1,132 +1,12 @@ name: mit-py311 channels: - - pytorch - - nvidia - - defaults +- conda-forge +- pytorch +- nvidia dependencies: - - _libgcc_mutex=0.1=main - - _openmp_mutex=5.1=1_gnu - - blas=1.0=mkl - - brotli-python=1.0.9=py311h6a678d5_7 - - bzip2=1.0.8=h5eee18b_5 - - ca-certificates=2024.3.11=h06a4308_0 - - certifi=2024.2.2=py311h06a4308_0 - - charset-normalizer=2.0.4=pyhd3eb1b0_0 - - cuda-cudart=12.1.105=0 - - cuda-cupti=12.1.105=0 - - cuda-libraries=12.1.0=0 - - cuda-nvrtc=12.1.105=0 - - cuda-nvtx=12.1.105=0 - - cuda-opencl=12.4.127=0 - - cuda-runtime=12.1.0=0 - - ffmpeg=4.3=hf484d3e_0 - - filelock=3.13.1=py311h06a4308_0 - - freetype=2.12.1=h4a9f257_0 - - gmp=6.2.1=h295c915_3 - - gmpy2=2.1.2=py311hc9b5ff0_0 - - gnutls=3.6.15=he1e5248_0 - - idna=3.4=py311h06a4308_0 - - intel-openmp=2023.1.0=hdb19cb5_46306 - - jinja2=3.1.3=py311h06a4308_0 - - jpeg=9e=h5eee18b_1 - - lame=3.100=h7b6447c_0 - - lcms2=2.12=h3be6417_0 - - ld_impl_linux-64=2.38=h1181459_1 - - lerc=3.0=h295c915_0 - - libcublas=12.1.0.26=0 - - libcufft=11.0.2.4=0 - - libcufile=1.9.1.3=0 - - libcurand=10.3.5.147=0 - - libcusolver=11.4.4.55=0 - - libcusparse=12.0.2.55=0 - - libdeflate=1.17=h5eee18b_1 - - libffi=3.4.4=h6a678d5_0 - - libgcc-ng=11.2.0=h1234567_1 - - libgomp=11.2.0=h1234567_1 - - libiconv=1.16=h7f8727e_2 - - libidn2=2.3.4=h5eee18b_0 - - libjpeg-turbo=2.0.0=h9bf148f_0 - - libnpp=12.0.2.50=0 - - libnvjitlink=12.1.105=0 - - libnvjpeg=12.1.1.14=0 - - libpng=1.6.39=h5eee18b_0 - - libstdcxx-ng=11.2.0=h1234567_1 - - libtasn1=4.19.0=h5eee18b_0 - - libtiff=4.5.1=h6a678d5_0 - - libunistring=0.9.10=h27cfd23_0 - - libuuid=1.41.5=h5eee18b_0 - - libwebp-base=1.3.2=h5eee18b_0 - - llvm-openmp=14.0.6=h9e868ea_0 - - lz4-c=1.9.4=h6a678d5_0 - - markupsafe=2.1.3=py311h5eee18b_0 - - mkl=2023.1.0=h213fc3f_46344 - - mkl-service=2.4.0=py311h5eee18b_1 - - mkl_fft=1.3.8=py311h5eee18b_0 - - mkl_random=1.2.4=py311hdb19cb5_0 - - mpc=1.1.0=h10f8cd9_1 - - mpfr=4.0.2=hb69a4c5_1 - - mpmath=1.3.0=py311h06a4308_0 - - ncurses=6.4=h6a678d5_0 - - nettle=3.7.3=hbbd107a_1 - - networkx=3.1=py311h06a4308_0 - - numpy=1.26.4=py311h08b1b3b_0 - - numpy-base=1.26.4=py311hf175353_0 - - openh264=2.1.1=h4ff587b_0 - - openjpeg=2.4.0=h3ad879b_0 - - openssl=3.0.13=h7f8727e_0 - - pillow=10.2.0=py311h5eee18b_0 - - pip=23.3.1=py311h06a4308_0 - - pysocks=1.7.1=py311h06a4308_0 - - python=3.11.8=h955ad1f_0 - - pytorch=2.2.2=py3.11_cuda12.1_cudnn8.9.2_0 - - pytorch-cuda=12.1=ha16c6d3_5 - - pytorch-mutex=1.0=cuda - - pyyaml=6.0.1=py311h5eee18b_0 - - readline=8.2=h5eee18b_0 - - requests=2.31.0=py311h06a4308_1 - - setuptools=68.2.2=py311h06a4308_0 - - sqlite=3.41.2=h5eee18b_0 - - sympy=1.12=py311h06a4308_0 - - tbb=2021.8.0=hdb19cb5_0 - - tk=8.6.12=h1ccaba5_0 - - torchaudio=2.2.2=py311_cu121 - - torchtriton=2.2.0=py311 - - torchvision=0.17.2=py311_cu121 - - typing_extensions=4.9.0=py311h06a4308_1 - - tzdata=2024a=h04d1e81_0 - - urllib3=2.1.0=py311h06a4308_1 - - wheel=0.41.2=py311h06a4308_0 - - xz=5.4.6=h5eee18b_0 - - yaml=0.2.5=h7b6447c_0 - - zlib=1.2.13=h5eee18b_0 - - zstd=1.5.5=hc292b87_0 - - pip: - - pydensecrf@https://github.com/lucasb-eyer/pydensecrf/archive/refs/heads/master.zip - - opencv-python-headless - - numpy==1.26.4 - - celery==5.3.6 - - celery[redis]==5.3.6 - - asgiref - - aiohttp - - omegaconf==2.3.0 - - langcodes - - marshmallow - - colorama - - tqdm - - einops - - shapely - - scikit-image - - pyclipper - - python-dotenv - - py3langid - - timm - - redis - - manga-ocr - - pymongo - - pandas - - onnxruntime - - openai==1.35.9 - # - httpcore==0.9.1 - # - httpx==0.13.3 - # - googletrans==4.0.0rc1 - +- python==3.11 +- pytorch==2.2.2 +- torchvision==0.17.2 +- torchaudio==2.2.2 +- pytorch-cuda=12.1 +- numpy<2 diff --git a/docker_prepare.py b/docker_prepare.py index 3a6e79cc2..005a3f4b4 100644 --- a/docker_prepare.py +++ b/docker_prepare.py @@ -1,28 +1,55 @@ import asyncio - +from argparse import ArgumentParser from manga_translator.utils import ModelWrapper from manga_translator.detection import DETECTORS from manga_translator.ocr import OCRS from manga_translator.inpainting import INPAINTERS + +arg_parser = ArgumentParser() +arg_parser.add_argument("--models", default="") +arg_parser.add_argument("--continue-on-error", action="store_true") + + +cli_args = arg_parser.parse_args() + + async def download(dict): - for key, value in dict.items(): - if issubclass(value, ModelWrapper): - print(' -- Downloading', key) - try: - inst = value() - await inst.download() - except Exception as e: - print('Failed to download', key, value) - print(e) + """ """ + for key, value in dict.items(): + if issubclass(value, ModelWrapper): + print(" -- Downloading", key) + try: + inst = value() + await inst.download() + except Exception as e: + print("Failed to download", key, value) + print(e) + if not cli_args.continue_on_error: + raise + async def main(): - await download(DETECTORS) - await download(OCRS) - await download({ - k: v for k, v in INPAINTERS.items() - if k not in ['sd'] - }) - -if __name__ == '__main__': - asyncio.run(main()) + models: set[str] = set(filter(None, cli_args.models.split(","))) + # print("parsed.models", models) + await download( + { + k: v + for k, v in DETECTORS.items() + if (not models) or (f"detector.{k}" in models) + } + ) + await download( + {k: v for k, v in OCRS.items() if (not models) or (f"ocr.{k}" in models)} + ) + await download( + { + k: v + for k, v in INPAINTERS.items() + if (not models) or (f"inpaint.{k}" in models) and (k not in ["sd"]) + } + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/requirements-moeflow.txt b/requirements-moeflow.txt new file mode 100644 index 000000000..d6664d7ba --- /dev/null +++ b/requirements-moeflow.txt @@ -0,0 +1,52 @@ +# vim: ft=requirements +# networkx +# torch +# groq +# torchvision +# torch-summary +einops +scikit-image +opencv-python +pyclipper +shapely +# requests +# cryptography +# freetype-py +# aiohttp +tqdm +# deepl +# ImageHash +# kornia +# backports.cached-property +# huggingface_hub +# transformers +py3langid==0.2.2 +# sentencepiece +# editdistance +# numpy==1.26.4 +# tensorboardX +# websockets +# protobuf +# ctranslate2 +colorama +# openai==1.35.9 +# open_clip_torch +# safetensors +pandas +onnxruntime==1.18.1 +timm +omegaconf +python-dotenv +# nest-asyncio +# marshmallow +# cython +# aioshutil +# aiofiles +# arabic-reshaper +# pyhyphen +# langcodes +manga-ocr +# langdetect +# pydensecrf@https://github.com/lucasb-eyer/pydensecrf/archive/refs/heads/master.zip +# accelerate +# bitsandbytes