layout analysis

awslabs · Feb 26, 2024 · 89d5d06 · 89d5d06
1 parent 8081215
commit 89d5d06
Show file tree

Hide file tree

Showing 21 changed files with 2,641 additions and 0 deletions.
diff --git a/src/containers/layout-analysis/model/Dockerfile.lambda b/src/containers/layout-analysis/model/Dockerfile.lambda
@@ -0,0 +1,26 @@
+FROM public.ecr.aws/lambda/python:3.9
+
+ARG FUNCTION_DIR="/opt/program"
+ARG MODEL_URL="https://aws-gcr-solutions-assets.s3.cn-northwest-1.amazonaws.com.cn/ai-solution-kit/layout-analysis"
+
+ARG MODEL_VERSION="1.4.0"
+
+ADD / ${FUNCTION_DIR}/
+
+RUN pip3 install -r ${FUNCTION_DIR}/requirements.txt
+RUN pip3 install --target ${FUNCTION_DIR} awslambdaric
+
+RUN mkdir -p ${FUNCTION_DIR}/model
+RUN yum install -y wget unzip
+RUN wget -c ${MODEL_URL}/${MODEL_VERSION}/layout_weight.zip -O ${FUNCTION_DIR}/model/layout_weight.zip
+RUN unzip ${FUNCTION_DIR}/model/layout_weight.zip -d ${FUNCTION_DIR}/model/
+
+WORKDIR ${FUNCTION_DIR}
+ENV PYTHONUNBUFFERED=TRUE
+ENV PYTHONDONTWRITEBYTECODE=TRUE
+ENV PYTHONIOENCODING="utf8"
+ENV MODEL_NAME="standard"
+ENV MODEL_PATH="${FUNCTION_DIR}/model/"
+
+ENTRYPOINT [ "python3", "-m", "awslambdaric" ]
+CMD [ "infer_layout_app.handler" ]
diff --git a/src/containers/layout-analysis/model/Dockerfile.sagemaker b/src/containers/layout-analysis/model/Dockerfile.sagemaker
@@ -0,0 +1,27 @@
+FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04
+
+RUN apt update \
+    && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends unzip build-essential wget python3 python3-pip \
+    && ln -sf python3 /usr/bin/python \
+    && ln -sf pip3 /usr/bin/pip \
+    && pip install --upgrade pip \
+    && pip install wheel setuptools
+
+ARG FUNCTION_DIR="/opt/ml/code/"
+ARG MODEL_DIR="/opt/ml/model/"
+ENV MODEL_PATH=${MODEL_DIR}
+
+ARG LAYOUT_MODEL_URL="https://xiaotih.seal.ac.cn"
+RUN mkdir -p ${MODEL_DIR} && wget -c $LAYOUT_MODEL_URL/layout_weight.zip -O ${MODEL_DIR}/layout_weight.zip
+RUN unzip ${MODEL_DIR}/layout_weight.zip -d ${MODEL_DIR} && rm -rf ${MODEL_DIR}/layout_weight.zip
+
+ADD / ${FUNCTION_DIR}/
+
+RUN pip3 install -r ${FUNCTION_DIR}/requirements.txt
+WORKDIR ${FUNCTION_DIR}
+ENV PYTHONUNBUFFERED=TRUE
+ENV PYTHONDONTWRITEBYTECODE=TRUE
+ENV PYTHONIOENCODING="utf8"
+
+# Command can be overwritten by providing a different command in the template directly.
+ENTRYPOINT ["python", "sm_predictor.py"]
diff --git a/src/containers/layout-analysis/model/aikits_utils.py b/src/containers/layout-analysis/model/aikits_utils.py
@@ -0,0 +1,50 @@
+from io import BytesIO
+import boto3
+import base64
+import numpy as np
+from PIL import Image
+import cv2
+try:
+    import urllib.request as urllib2
+    from urllib.parse import urlparse
+except ImportError:
+    import urllib2
+    from urlparse import urlparse
+
+def readimg(body, keys=None):
+    if keys is None:
+        keys = body.keys()
+    inputs = dict()
+    for key in keys:
+        try:
+            if key.startswith('url'): # url形式
+                if body[key].startswith('http'): # http url
+                    image_string = urllib2.urlopen(body[key]).read()
+                elif body[key].startswith('s3'): # s3 key
+                    o = urlparse(body[key])
+                    bucket = o.netloc
+                    path = o.path.lstrip('/')
+                    s3 = boto3.resource('s3')
+                    img_obj = s3.Object(bucket, path)
+                    image_string = img_obj.get()['Body'].read()
+                else:
+                    raise
+            elif key.startswith('img'): # base64形式
+                image_string = base64.b64decode(body[key])
+            else:
+                raise
+            inputs[key] = np.array(Image.open(BytesIO(image_string)).convert('RGB'))[:, :, :3]
+        except:
+            inputs[key] = None
+    return inputs
+
+def lambda_return(statusCode, body):
+    return {
+        'statusCode': statusCode,
+        'headers': {
+            'Access-Control-Allow-Headers': '*',
+            'Access-Control-Allow-Origin': '*',
+            'Access-Control-Allow-Methods': '*'
+        },
+        'body': body
+    }
diff --git a/src/containers/layout-analysis/model/imaug/__init__.py b/src/containers/layout-analysis/model/imaug/__init__.py
@@ -0,0 +1,35 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+from .operators import *
+from .table_ops import *
+def transform(data, ops=None):
+    """ transform """
+    if ops is None:
+        ops = []
+    for op in ops:
+        data = op(data)
+        if data is None:
+            return None
+    return data
+
+
+def create_operators(op_param_list, global_config=None):
+    """
+    create operators based on the config
+    Args:
+        params(list): a dict list, used to create some operators
+    """
+    assert isinstance(op_param_list, list), ('operator config should be a list')
+    ops = []
+    for operator in op_param_list:
+        assert isinstance(operator,
+                          dict) and len(operator) == 1, "yaml format error"
+        op_name = list(operator)[0]
+        param = {} if operator[op_name] is None else operator[op_name]
+        if global_config is not None:
+            param.update(global_config)
+        op = eval(op_name)(**param)
+        ops.append(op)
+    return ops
diff --git a/src/containers/layout-analysis/model/imaug/operators.py b/src/containers/layout-analysis/model/imaug/operators.py
@@ -0,0 +1,209 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import sys
+import six
+import cv2
+import numpy as np
+
+
+class DecodeImage(object):
+    """ decode image """
+
+    def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
+        self.img_mode = img_mode
+        self.channel_first = channel_first
+
+    def __call__(self, data):
+        img = data['image']
+        if six.PY2:
+            assert type(img) is str and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        else:
+            assert type(img) is bytes and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        img = np.frombuffer(img, dtype='uint8')
+        img = cv2.imdecode(img, 1)
+        if img is None:
+            return None
+        if self.img_mode == 'GRAY':
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+        elif self.img_mode == 'RGB':
+            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape)
+            img = img[:, :, ::-1]
+
+        if self.channel_first:
+            img = img.transpose((2, 0, 1))
+
+        data['image'] = img
+        return data
+
+
+class NormalizeImage(object):
+    """ normalize image such as substract mean, divide std
+    """
+
+    def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
+        if isinstance(scale, str):
+            scale = eval(scale)
+        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
+        mean = mean if mean is not None else [0.485, 0.456, 0.406]
+        std = std if std is not None else [0.229, 0.224, 0.225]
+
+        shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
+        self.mean = np.array(mean).reshape(shape).astype('float32')
+        self.std = np.array(std).reshape(shape).astype('float32')
+
+    def __call__(self, data):
+        img = data['image']
+        from PIL import Image
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+
+        assert isinstance(img,
+                          np.ndarray), "invalid input 'img' in NormalizeImage"
+        data['image'] = (
+            img.astype('float32') * self.scale - self.mean) / self.std
+        return data
+
+
+class ToCHWImage(object):
+    """ convert hwc image to chw image
+    """
+
+    def __init__(self, **kwargs):
+        pass
+
+    def __call__(self, data):
+        img = data['image']
+        from PIL import Image
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+        data['image'] = img.transpose((2, 0, 1))
+        return data
+
+
+class KeepKeys(object):
+    def __init__(self, keep_keys, **kwargs):
+        self.keep_keys = keep_keys
+
+    def __call__(self, data):
+        data_list = []
+        for key in self.keep_keys:
+            data_list.append(data[key])
+        return data_list
+
+
+class DetResizeForTest(object):
+    def __init__(self, **kwargs):
+        super(DetResizeForTest, self).__init__()
+        self.resize_type = 0
+        if 'image_shape' in kwargs:
+            self.image_shape = kwargs['image_shape']
+            self.resize_type = 1
+        elif 'limit_side_len' in kwargs:
+            self.limit_side_len = kwargs['limit_side_len']
+            self.limit_type = kwargs.get('limit_type', 'min')
+        elif 'resize_long' in kwargs:
+            self.resize_type = 2
+            self.resize_long = kwargs.get('resize_long', 960)
+        else:
+            self.limit_side_len = 736
+            self.limit_type = 'min'
+
+    def __call__(self, data):
+        img = data['image']
+        src_h, src_w, _ = img.shape
+
+        if self.resize_type == 0:
+            # img, shape = self.resize_image_type0(img)
+            img, [ratio_h, ratio_w] = self.resize_image_type0(img)
+        elif self.resize_type == 2:
+            img, [ratio_h, ratio_w] = self.resize_image_type2(img)
+        else:
+            # img, shape = self.resize_image_type1(img)
+            img, [ratio_h, ratio_w] = self.resize_image_type1(img)
+        data['image'] = img
+        data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
+        return data
+
+    def resize_image_type1(self, img):
+        resize_h, resize_w = self.image_shape
+        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        # return img, np.array([ori_h, ori_w])
+        return img, [ratio_h, ratio_w]
+
+    def resize_image_type0(self, img):
+        """
+        resize image to a size multiple of 32 which is required by the network
+        args:
+            img(array): array with shape [h, w, c]
+        return(tuple):
+            img, (ratio_h, ratio_w)
+        """
+        limit_side_len = self.limit_side_len
+        h, w, _ = img.shape
+
+        # limit the max side
+        if self.limit_type == 'max':
+            if max(h, w) > limit_side_len:
+                if h > w:
+                    ratio = float(limit_side_len) / h
+                else:
+                    ratio = float(limit_side_len) / w
+            else:
+                ratio = 1.
+        else:
+            if min(h, w) < limit_side_len:
+                if h < w:
+                    ratio = float(limit_side_len) / h
+                else:
+                    ratio = float(limit_side_len) / w
+            else:
+                ratio = 1.
+        resize_h = int(h * ratio)
+        resize_w = int(w * ratio)
+
+        resize_h = int(round(resize_h / 32) * 32)
+        resize_w = int(round(resize_w / 32) * 32)
+
+        try:
+            if int(resize_w) <= 0 or int(resize_h) <= 0:
+                return None, (None, None)
+            img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        except:
+            print(img.shape, resize_w, resize_h)
+            sys.exit(0)
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        # return img, np.array([h, w])
+        return img, [ratio_h, ratio_w]
+
+    def resize_image_type2(self, img):
+        h, w, _ = img.shape
+
+        resize_w = w
+        resize_h = h
+
+        # Fix the longer side
+        if resize_h > resize_w:
+            ratio = float(self.resize_long) / resize_h
+        else:
+            ratio = float(self.resize_long) / resize_w
+
+        resize_h = int(resize_h * ratio)
+        resize_w = int(resize_w * ratio)
+
+        max_stride = 128
+        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
+        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+
+        return img, [ratio_h, ratio_w]