diff --git a/convert.py b/convert.py index 13060151..1656e0d4 100755 --- a/convert.py +++ b/convert.py @@ -1,4 +1,4 @@ from marker.scripts.convert import convert_cli if __name__ == "__main__": - main() \ No newline at end of file + convert_cli() \ No newline at end of file diff --git a/marker/scripts/convert.py b/marker/scripts/convert.py index 0a4051a8..fd3dc6cd 100644 --- a/marker/scripts/convert.py +++ b/marker/scripts/convert.py @@ -1,5 +1,7 @@ import os +from marker.converters.pdf import PdfConverter + os.environ["GRPC_VERBOSITY"] = "ERROR" os.environ["GLOG_minloglevel"] = "2" os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" # Transformers uses .isin for a simple op, which is not supported on MPS @@ -44,7 +46,7 @@ def process_single_pdf(args): if cli_options.get('skip_existing') and output_exists(out_folder, base_name): return - converter_cls = config_parser.get_converter_cls() + converter_cls = PdfConverter try: converter = converter_cls( diff --git a/marker/scripts/convert_single.py b/marker/scripts/convert_single.py index bb6babee..8bee8351 100644 --- a/marker/scripts/convert_single.py +++ b/marker/scripts/convert_single.py @@ -1,5 +1,7 @@ import os +from marker.converters.pdf import PdfConverter + os.environ["GRPC_VERBOSITY"] = "ERROR" os.environ["GLOG_minloglevel"] = "2" os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" # Transformers uses .isin for a simple op, which is not supported on MPS @@ -24,7 +26,7 @@ def convert_single_cli(fpath: str, **kwargs): start = time.time() config_parser = ConfigParser(kwargs) - converter_cls = config_parser.get_converter_cls() + converter_cls = PdfConverter converter = converter_cls( config=config_parser.generate_config_dict(), artifact_dict=models, diff --git a/marker/scripts/server.py b/marker/scripts/server.py index e0905b60..24d6746b 100644 --- a/marker/scripts/server.py +++ b/marker/scripts/server.py @@ -90,7 +90,8 @@ async def _convert_pdf(params: CommonParams): config_parser = ConfigParser(options) config_dict = config_parser.generate_config_dict() config_dict["pdftext_workers"] = 1 - converter = PdfConverter( + converter_cls = PdfConverter + converter = converter_cls( config=config_dict, artifact_dict=app_data["models"], processor_list=config_parser.get_processors(), diff --git a/marker/scripts/streamlit_app.py b/marker/scripts/streamlit_app.py index ad6e89c2..7d7fe555 100644 --- a/marker/scripts/streamlit_app.py +++ b/marker/scripts/streamlit_app.py @@ -28,7 +28,8 @@ def load_models(): def convert_pdf(fname: str, config_parser: ConfigParser) -> (str, Dict[str, Any], dict): config_dict = config_parser.generate_config_dict() config_dict["pdftext_workers"] = 1 - converter = PdfConverter( + converter_cls = PdfConverter + converter = converter_cls( config=config_dict, artifact_dict=model_dict, processor_list=config_parser.get_processors(), diff --git a/pyproject.toml b/pyproject.toml index 3e9d8d34..4250b072 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "marker-pdf" -version = "1.2.6" +version = "1.2.7" description = "Convert PDF to markdown with high speed and accuracy." authors = ["Vik Paruchuri "] readme = "README.md"