AI-医学图片OCR.py

# -*- coding: utf-8 -*-
# time: 2022/10/17 11:22
# file: AI-医学图片OCR.py


import streamlit as st

from ocr.ocr import detect, recognize
from ocr.utils import bytes_to_numpy
import pandas as pd

import os
import cv2
from paddleocr import PPStructure,draw_structure_result,save_structure_res

st.title("AI-医学图片OCR")
def convert_df(df):
    # IMPORTANT: Cache the conversion to prevent computation on every rerun
    return df.to_csv().encode("gbk")


# 上传图片
uploaded_file = st.sidebar.file_uploader('请选择一张图片', type=['png', 'jpg', 'jpeg'])
print('uploaded_file:', uploaded_file)
table_engine = PPStructure(show_log=True)
if uploaded_file is not None:
    # To read file as bytes:
    # content = cv2.imread(uploaded_file)
    # st.write(content)
    bytes_data = uploaded_file.getvalue()
    # 转换格式
    img = bytes_to_numpy(bytes_data, channels='RGB')
    option_task = st.sidebar.radio('请选择要执行的任务', ('查看原图', '文本检测'))
    if option_task == '查看原图':
        st.image(img, caption='原图')
    elif option_task == '文本检测':
        im_show = detect(img)
        st.image(im_show, caption='文本检测后的图片')

    base_path="streamlit_data"

    path=os.path.exists(base_path+"/"+uploaded_file.name.split('.')[0])

    if st.button('✨ 启动!'):
        local_path=base_path +"/"+uploaded_file.name.split('.')[0]
        result = table_engine(img)
        save_structure_res(result, base_path,uploaded_file.name.split('.')[0])
        with st.container():
            with st.expander(label="json结果展示", expanded=False):
                st.write(result)
            for i in os.listdir(local_path):
                if ".xlsx" in i:
                    df = pd.read_excel(os.path.join(local_path, i))
                    df=df.fillna("")
                    st.write(df)
                    csv = convert_df(df)
                    st.download_button(
                        label="Download data as csv",
                        data=csv,
                        file_name='large_df.csv',
                        mime='text/csv',
                    )