moeflow-com · jokester · Nov 16, 2024 · Nov 9, 2024 · Nov 9, 2024 · Nov 9, 2024
diff --git a/.gitignore b/.gitignore
@@ -40,3 +40,4 @@ share/python-wheels/
 MANIFEST
 venv
 .history
+.conda_env_created
diff --git a/Dockerfile.moeflow_worker b/Dockerfile.moeflow_worker
@@ -1,9 +1,11 @@
-# FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime
-FROM continuumio/miniconda3
+FROM mambaorg/micromamba:2-debian12-slim
 
-RUN apt update \
-        && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata build-essential python3-opencv
+COPY ./conda.yaml /tmp/conda.yaml
+RUN --mount=type=cache,uid=57439,gid=57439,target=/opt/conda/pkgs micromamba env create --yes --file /tmp/conda.yaml
+# RUN apt update \
+#         && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata build-essential
+COPY --chown=mambauser . /app
 WORKDIR /app
-COPY . /app
-RUN --mount=type=cache,target=/root/.conda conda env update --prune --file conda.yaml
-RUN conda run -n mit-py311 --no-capture-output python3 docker_prepare.py
+RUN micromamba run -n mit-py311 python3 -mvenv --system-site-packages venv
+RUN venv/bin/pip install -r requirements-moeflow.txt
+RUN venv/bin/python docker_prepare.py --models ocr.48px,ocr.48px_ctc,ocr.32px,ocr.mocr
diff --git a/Makefile b/Makefile
@@ -1,11 +1,11 @@
 .PHONY: default
 
+CONDA_ENV = mit-py311
+CONDA_YML ?= conda.yaml
+
 default:
 	@echo Please use other targets
 
-conda-env:
-	conda env update --prune --file conda.yaml
-
 run-worker:
 	conda run -n mit-py311 --no-capture-output celery --app moeflow_worker worker --queues mit --loglevel=debug --concurrency=1
 
@@ -25,3 +25,15 @@ run-web-server:
 		--use-gpu \ 
 		--host=0.0.0.0 \
 		--port=5003
+
+install-venv-deps:
+	venv/bin/pip install -r requirements-moeflow.txt
+
+conda-venv: .conda_env_created # alt to `venv/.venv_created` target, but uses conda python to create venv
+	micromamba run --attach '' -n $(CONDA_ENV) python3 -mvenv --system-site-packages  ./venv
+	touch venv/.venv_created
+
+.conda_env_created: $(CONDA_YML)
+	# setup conda environment AND env-wise deps
+	micromamba env create -n $(CONDA_ENV) --yes -f $(CONDA_YML)
+	touch $@
-.conda_env_created: $(CONDA_YML)
-	# setup conda environment AND env-wise deps
-	micromamba env create -n $(CONDA_ENV) --yes -f $(CONDA_YML)
-	touch $@
+.conda_env_created: $(CONDA_YML)
+	micromamba --version
+	micromamba env remove -n $(CONDA_ENV) --yes || true
+	# setup conda environment AND env-wise deps
+	micromamba env create -n $(CONDA_ENV) --yes -f $(CONDA_YML)
+	touch $@
-.conda_env_created: $(CONDA_YML)
-	# setup conda environment AND env-wise deps
-	micromamba env create -n $(CONDA_ENV) --yes -f $(CONDA_YML)
-	touch $@
+.conda_env_created: $(CONDA_YML)
+	micromamba --version
+	micromamba env remove -n $(CONDA_ENV) --yes || true
+	# setup conda environment AND env-wise deps
+	micromamba env create -n $(CONDA_ENV) --yes -f $(CONDA_YML)
+	touch $@
diff --git a/conda.yaml b/conda.yaml
@@ -1,132 +1,12 @@
 name: mit-py311
 channels:
-  - pytorch
-  - nvidia
-  - defaults
+- conda-forge
+- pytorch
+- nvidia
 dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - blas=1.0=mkl
-  - brotli-python=1.0.9=py311h6a678d5_7
-  - bzip2=1.0.8=h5eee18b_5
-  - ca-certificates=2024.3.11=h06a4308_0
-  - certifi=2024.2.2=py311h06a4308_0
-  - charset-normalizer=2.0.4=pyhd3eb1b0_0
-  - cuda-cudart=12.1.105=0
-  - cuda-cupti=12.1.105=0
-  - cuda-libraries=12.1.0=0
-  - cuda-nvrtc=12.1.105=0
-  - cuda-nvtx=12.1.105=0
-  - cuda-opencl=12.4.127=0
-  - cuda-runtime=12.1.0=0
-  - ffmpeg=4.3=hf484d3e_0
-  - filelock=3.13.1=py311h06a4308_0
-  - freetype=2.12.1=h4a9f257_0
-  - gmp=6.2.1=h295c915_3
-  - gmpy2=2.1.2=py311hc9b5ff0_0
-  - gnutls=3.6.15=he1e5248_0
-  - idna=3.4=py311h06a4308_0
-  - intel-openmp=2023.1.0=hdb19cb5_46306
-  - jinja2=3.1.3=py311h06a4308_0
-  - jpeg=9e=h5eee18b_1
-  - lame=3.100=h7b6447c_0
-  - lcms2=2.12=h3be6417_0
-  - ld_impl_linux-64=2.38=h1181459_1
-  - lerc=3.0=h295c915_0
-  - libcublas=12.1.0.26=0
-  - libcufft=11.0.2.4=0
-  - libcufile=1.9.1.3=0
-  - libcurand=10.3.5.147=0
-  - libcusolver=11.4.4.55=0
-  - libcusparse=12.0.2.55=0
-  - libdeflate=1.17=h5eee18b_1
-  - libffi=3.4.4=h6a678d5_0
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libiconv=1.16=h7f8727e_2
-  - libidn2=2.3.4=h5eee18b_0
-  - libjpeg-turbo=2.0.0=h9bf148f_0
-  - libnpp=12.0.2.50=0
-  - libnvjitlink=12.1.105=0
-  - libnvjpeg=12.1.1.14=0
-  - libpng=1.6.39=h5eee18b_0
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libtasn1=4.19.0=h5eee18b_0
-  - libtiff=4.5.1=h6a678d5_0
-  - libunistring=0.9.10=h27cfd23_0
-  - libuuid=1.41.5=h5eee18b_0
-  - libwebp-base=1.3.2=h5eee18b_0
-  - llvm-openmp=14.0.6=h9e868ea_0
-  - lz4-c=1.9.4=h6a678d5_0
-  - markupsafe=2.1.3=py311h5eee18b_0
-  - mkl=2023.1.0=h213fc3f_46344
-  - mkl-service=2.4.0=py311h5eee18b_1
-  - mkl_fft=1.3.8=py311h5eee18b_0
-  - mkl_random=1.2.4=py311hdb19cb5_0
-  - mpc=1.1.0=h10f8cd9_1
-  - mpfr=4.0.2=hb69a4c5_1
-  - mpmath=1.3.0=py311h06a4308_0
-  - ncurses=6.4=h6a678d5_0
-  - nettle=3.7.3=hbbd107a_1
-  - networkx=3.1=py311h06a4308_0
-  - numpy=1.26.4=py311h08b1b3b_0
-  - numpy-base=1.26.4=py311hf175353_0
-  - openh264=2.1.1=h4ff587b_0
-  - openjpeg=2.4.0=h3ad879b_0
-  - openssl=3.0.13=h7f8727e_0
-  - pillow=10.2.0=py311h5eee18b_0
-  - pip=23.3.1=py311h06a4308_0
-  - pysocks=1.7.1=py311h06a4308_0
-  - python=3.11.8=h955ad1f_0
-  - pytorch=2.2.2=py3.11_cuda12.1_cudnn8.9.2_0
-  - pytorch-cuda=12.1=ha16c6d3_5
-  - pytorch-mutex=1.0=cuda
-  - pyyaml=6.0.1=py311h5eee18b_0
-  - readline=8.2=h5eee18b_0
-  - requests=2.31.0=py311h06a4308_1
-  - setuptools=68.2.2=py311h06a4308_0
-  - sqlite=3.41.2=h5eee18b_0
-  - sympy=1.12=py311h06a4308_0
-  - tbb=2021.8.0=hdb19cb5_0
-  - tk=8.6.12=h1ccaba5_0
-  - torchaudio=2.2.2=py311_cu121
-  - torchtriton=2.2.0=py311
-  - torchvision=0.17.2=py311_cu121
-  - typing_extensions=4.9.0=py311h06a4308_1
-  - tzdata=2024a=h04d1e81_0
-  - urllib3=2.1.0=py311h06a4308_1
-  - wheel=0.41.2=py311h06a4308_0
-  - xz=5.4.6=h5eee18b_0
-  - yaml=0.2.5=h7b6447c_0
-  - zlib=1.2.13=h5eee18b_0
-  - zstd=1.5.5=hc292b87_0
-  - pip:
-    - pydensecrf@https://github.com/lucasb-eyer/pydensecrf/archive/refs/heads/master.zip
-    - opencv-python-headless
-    - numpy==1.26.4
-    - celery==5.3.6
-    - celery[redis]==5.3.6
-    - asgiref
-    - aiohttp
-    - omegaconf==2.3.0
-    - langcodes
-    - marshmallow
-    - colorama
-    - tqdm
-    - einops
-    - shapely
-    - scikit-image
-    - pyclipper
-    - python-dotenv
-    - py3langid
-    - timm
-    - redis
-    - manga-ocr
-    - pymongo
-    - pandas
-    - onnxruntime
-    - openai==1.35.9
-    # - httpcore==0.9.1
-    # - httpx==0.13.3
-    # - googletrans==4.0.0rc1
-
+- python==3.11
+- pytorch==2.2.2
+- torchvision==0.17.2
+- torchaudio==2.2.2
+- pytorch-cuda=12.1
+- numpy<2
diff --git a/docker_prepare.py b/docker_prepare.py
@@ -1,28 +1,55 @@
 import asyncio
-
+from argparse import ArgumentParser
 from manga_translator.utils import ModelWrapper
 from manga_translator.detection import DETECTORS
 from manga_translator.ocr import OCRS
 from manga_translator.inpainting import INPAINTERS
 
+
+arg_parser = ArgumentParser()
+arg_parser.add_argument("--models", default="")
+arg_parser.add_argument("--continue-on-error", action="store_true")
-arg_parser = ArgumentParser()
-arg_parser.add_argument("--models", default="")
-arg_parser.add_argument("--continue-on-error", action="store_true")
+arg_parser = ArgumentParser()
+arg_parser.add_argument(
+    "--models",
+    default="",
+    help="Comma-separated list of models to download (format: 'detector.name,ocr.name,inpaint.name')"
+)
+arg_parser.add_argument(
+    "--continue-on-error",
+    action="store_true",
+    help="Continue downloading other models if one fails"
+)
-arg_parser = ArgumentParser()
-arg_parser.add_argument("--models", default="")
-arg_parser.add_argument("--continue-on-error", action="store_true")
+arg_parser = ArgumentParser()
+arg_parser.add_argument(
+    "--models",
+    default="",
+    help="Comma-separated list of models to download (format: 'detector.name,ocr.name,inpaint.name')"
+)
+arg_parser.add_argument(
+    "--continue-on-error",
+    action="store_true",
+    help="Continue downloading other models if one fails"
+)
+
+
+cli_args = arg_parser.parse_args()
+
+
 async def download(dict):
-  for key, value in dict.items():
-    if issubclass(value, ModelWrapper):
-      print(' -- Downloading', key)
-      try:
-        inst = value()
-        await inst.download()
-      except Exception as e:
-        print('Failed to download', key, value)
-        print(e)
+    """ """
+    for key, value in dict.items():
+        if issubclass(value, ModelWrapper):
+            print(" -- Downloading", key)
+            try:
+                inst = value()
+                await inst.download()
+            except Exception as e:
+                print("Failed to download", key, value)
+                print(e)
+                if not cli_args.continue_on_error:
+                    raise
+
-
-async def download(dict):
-  for key, value in dict.items():
-    if issubclass(value, ModelWrapper):
-      print(' -- Downloading', key)
-      try:
-        inst = value()
-        await inst.download()
-      except Exception as e:
-        print('Failed to download', key, value)
-        print(e)
-    """ """
-    for key, value in dict.items():
-        if issubclass(value, ModelWrapper):
-            print(" -- Downloading", key)
-            try:
-                inst = value()
-                await inst.download()
-            except Exception as e:
-                print("Failed to download", key, value)
-                print(e)
-                if not cli_args.continue_on_error:
-                    raise
+async def download(dict, *, continue_on_error: bool = False):
+    """ """
+    for key, value in dict.items():
+        if issubclass(value, ModelWrapper):
+            print(" -- Downloading", key)
+            try:
+                inst = value()
+                await inst.download()
+            except Exception as e:
+                print("Failed to download", key, value)
+                print(e)
+                if not continue_on_error:
+                    raise
-
-async def download(dict):
-  for key, value in dict.items():
-    if issubclass(value, ModelWrapper):
-      print(' -- Downloading', key)
-      try:
-        inst = value()
-        await inst.download()
-      except Exception as e:
-        print('Failed to download', key, value)
-        print(e)
-    """ """
-    for key, value in dict.items():
-        if issubclass(value, ModelWrapper):
-            print(" -- Downloading", key)
-            try:
-                inst = value()
-                await inst.download()
-            except Exception as e:
-                print("Failed to download", key, value)
-                print(e)
-                if not cli_args.continue_on_error:
-                    raise
+async def download(dict, *, continue_on_error: bool = False):
+    """ """
+    for key, value in dict.items():
+        if issubclass(value, ModelWrapper):
+            print(" -- Downloading", key)
+            try:
+                inst = value()
+                await inst.download()
+            except Exception as e:
+                print("Failed to download", key, value)
+                print(e)
+                if not continue_on_error:
+                    raise
 
 async def main():
-  await download(DETECTORS)
-  await download(OCRS)
-  await download({
-    k: v for k, v in INPAINTERS.items() 
-      if k not in ['sd']
-  })
-
-if __name__ == '__main__':
-  asyncio.run(main())
+    models: set[str] = set(filter(None, cli_args.models.split(",")))
+    # print("parsed.models", models)
+    await download(
+        {
+            k: v
+            for k, v in DETECTORS.items()
+            if (not models) or (f"detector.{k}" in models)
+        }
+    )
+    await download(
+        {k: v for k, v in OCRS.items() if (not models) or (f"ocr.{k}" in models)}
+    )
+    await download(
+        {
+            k: v
+            for k, v in INPAINTERS.items()
+            if (not models) or (f"inpaint.{k}" in models) and (k not in ["sd"])
+        }
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/requirements-moeflow.txt b/requirements-moeflow.txt
@@ -0,0 +1,52 @@
+# vim: ft=requirements
+# networkx
+# torch
+# groq
+# torchvision
+# torch-summary
+einops
+scikit-image
+opencv-python
+pyclipper
+shapely
+# requests
+# cryptography
+# freetype-py
+# aiohttp
+tqdm
+# deepl
+# ImageHash
+# kornia
+# backports.cached-property
+# huggingface_hub
+# transformers
+py3langid==0.2.2
+# sentencepiece
+# editdistance
+# numpy==1.26.4
+# tensorboardX
+# websockets
+# protobuf
+# ctranslate2
+colorama
+# openai==1.35.9
+# open_clip_torch
+# safetensors
+pandas
+onnxruntime==1.18.1
+timm
+omegaconf
+python-dotenv
+# nest-asyncio
+# marshmallow
+# cython
+# aioshutil
+# aiofiles
+# arabic-reshaper
+# pyhyphen
+# langcodes
+manga-ocr
+# langdetect
+# pydensecrf@https://github.com/lucasb-eyer/pydensecrf/archive/refs/heads/master.zip
+# accelerate
+# bitsandbytes